forge/app/Console/Commands/ImportWoltlabData.php

468 lines
18 KiB
PHP
Raw Normal View History

2024-05-20 02:19:02 -04:00
<?php
namespace App\Console\Commands;
2024-05-21 21:02:49 -04:00
use App\Models\License;
use App\Models\Mod;
2024-05-22 01:00:37 -04:00
use App\Models\ModVersion;
use App\Models\SptVersion;
2024-05-20 02:19:02 -04:00
use App\Models\User;
use Carbon\Carbon;
use Illuminate\Console\Command;
use Illuminate\Support\Benchmark;
2024-05-20 02:19:02 -04:00
use Illuminate\Support\Collection;
use Illuminate\Support\Facades\DB;
2024-05-21 21:02:49 -04:00
use Illuminate\Support\Facades\Storage;
use Illuminate\Support\Str;
2024-06-01 23:04:06 -04:00
use League\HTMLToMarkdown\HtmlConverter;
use Stevebauman\Purify\Facades\Purify;
2024-05-20 02:19:02 -04:00
class ImportWoltlabData extends Command
{
protected $signature = 'app:import-woltlab-data';
protected $description = 'Connects to the Woltlab database and imports the data into the Laravel database.';
2024-05-22 01:00:37 -04:00
protected array $fileOptionValues = [];
protected array $fileContent = [];
protected array $fileVersionContent = [];
protected array $fileVersionLabels = [];
2024-05-20 02:19:02 -04:00
/**
* Execute the console command.
*/
public function handle(): void
{
$this->newLine();
2024-05-22 01:00:37 -04:00
$totalTime = Benchmark::value(function () {
$loadDataTime = Benchmark::value(function () {
$this->loadData();
});
$this->info('Execution time: '.round($loadDataTime[1], 2).'ms');
$this->newLine();
2024-05-22 01:00:37 -04:00
$importUsersTime = Benchmark::value(function () {
$this->importUsers();
});
$this->info('Execution time: '.round($importUsersTime[1], 2).'ms');
$this->newLine();
$importLicensesTime = Benchmark::value(function () {
$this->importLicenses();
});
$this->info('Execution time: '.round($importLicensesTime[1], 2).'ms');
$this->newLine();
$importSptVersionsTime = Benchmark::value(function () {
$this->importSptVersions();
});
$this->info('Execution time: '.round($importSptVersionsTime[1], 2).'ms');
$this->newLine();
$importModsTime = Benchmark::value(function () {
$this->importMods();
});
$this->info('Execution time: '.round($importModsTime[1], 2).'ms');
$this->newLine();
2024-05-22 01:00:37 -04:00
$importModVersionsTime = Benchmark::value(function () {
$this->importModVersions();
});
$this->info('Execution time: '.round($importModVersionsTime[1], 2).'ms');
$this->newLine();
});
$this->newLine();
$this->info('Data imported successfully');
$this->info('Total execution time: '.round($totalTime[1], 2).'ms');
}
protected function loadData(): void
{
// We're just going to dump a few things in memory to escape the N+1 problem.
$this->output->write('Loading data into memory... ');
2024-05-22 01:00:37 -04:00
$this->fileOptionValues = $this->getFileOptionValues();
$this->fileContent = $this->getFileContent();
$this->fileVersionLabels = $this->getFileVersionLabels();
$this->fileVersionContent = $this->getFileVersionContent();
$this->info('Done.');
2024-05-20 02:19:02 -04:00
}
protected function importUsers(): void
{
$totalInserted = 0;
DB::connection('mysql_woltlab')->table('wcf1_user')->chunkById(2500, function (Collection $users) use (&$totalInserted) {
$insertData = [];
foreach ($users as $wolt) {
$registrationDate = Carbon::parse($wolt->registrationDate, 'UTC');
if ($registrationDate->isFuture()) {
$registrationDate = now('UTC');
}
$registrationDate->setTimezone('UTC');
$insertData[] = [
2024-05-21 21:02:49 -04:00
'hub_id' => $wolt->userID,
2024-05-20 02:19:02 -04:00
'name' => $wolt->username,
'email' => mb_convert_case($wolt->email, MB_CASE_LOWER, 'UTF-8'),
2024-06-01 23:51:22 -04:00
'password' => $this->cleanPasswordHash($wolt->password),
2024-05-20 02:19:02 -04:00
'created_at' => $registrationDate,
'updated_at' => now('UTC')->toDateTimeString(),
];
}
2024-05-22 01:00:37 -04:00
if (! empty($insertData)) {
2024-05-21 21:02:49 -04:00
User::upsert($insertData, ['hub_id'], ['name', 'email', 'password', 'created_at', 'updated_at']);
2024-05-20 02:19:02 -04:00
$totalInserted += count($insertData);
2024-05-22 01:00:37 -04:00
$this->line('Processed '.count($insertData).' users. Total processed so far: '.$totalInserted);
2024-05-20 02:19:02 -04:00
}
unset($insertData);
unset($users);
}, 'userID');
2024-05-22 01:00:37 -04:00
$this->info('Total users processed: '.$totalInserted);
2024-05-21 21:02:49 -04:00
}
2024-06-01 23:51:22 -04:00
protected function cleanPasswordHash(string $password): string
{
// The WoltLab password hash sometimes? has a prefix of the hash type. We only want the hash.
return str_replace(['Bcrypt:', 'cryptMD5:', 'cryptMD5::'], '', $password);
}
2024-05-21 21:02:49 -04:00
protected function importLicenses(): void
{
$totalInserted = 0;
DB::connection('mysql_woltlab')->table('filebase1_license')->chunkById(100, function (Collection $licenses) use (&$totalInserted) {
$insertData = [];
foreach ($licenses as $license) {
$insertData[] = [
'hub_id' => $license->licenseID,
'name' => $license->licenseName,
'link' => $license->licenseURL,
];
}
2024-05-22 01:00:37 -04:00
if (! empty($insertData)) {
2024-05-21 21:02:49 -04:00
DB::table('licenses')->upsert($insertData, ['hub_id'], ['name', 'link']);
$totalInserted += count($insertData);
2024-05-22 01:00:37 -04:00
$this->line('Processed '.count($insertData).' licenses. Total processed so far: '.$totalInserted);
2024-05-21 21:02:49 -04:00
}
unset($insertData);
unset($licenses);
}, 'licenseID');
2024-05-22 01:00:37 -04:00
$this->info('Total licenses processed: '.$totalInserted);
}
protected function importSptVersions(): void
{
$totalInserted = 0;
DB::connection('mysql_woltlab')->table('wcf1_label')->where('groupID', 1)->chunkById(100, function (Collection $versions) use (&$totalInserted) {
$insertData = [];
foreach ($versions as $version) {
$insertData[] = [
'hub_id' => $version->labelID,
'version' => $version->label,
'color_class' => $this->translateColour($version->cssClassName),
2024-05-22 01:00:37 -04:00
];
}
if (! empty($insertData)) {
DB::table('spt_versions')->upsert($insertData, ['hub_id'], ['version', 'color_class']);
$totalInserted += count($insertData);
$this->line('Processed '.count($insertData).' SPT Versions. Total processed so far: '.$totalInserted);
}
unset($insertData);
unset($versions);
}, 'labelID');
$this->info('Total licenses processed: '.$totalInserted);
}
protected function translateColour(string $colour = ''): string
{
return match ($colour) {
'green' => 'green',
'slightly-outdated' => 'lime',
'yellow' => 'yellow',
'red' => 'red',
default => 'gray',
};
2024-05-21 21:02:49 -04:00
}
protected function importMods(): void
{
$command = $this;
$totalInserted = 0;
2024-05-22 01:00:37 -04:00
$curl = curl_init();
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
2024-05-21 21:02:49 -04:00
2024-05-22 01:00:37 -04:00
DB::connection('mysql_woltlab')->table('filebase1_file')->chunkById(100, function (Collection $mods) use (&$command, &$curl, &$totalInserted) {
foreach ($mods as $mod) {
$modContent = $this->fileContent[$mod->fileID] ?? [];
$modOptions = $this->fileOptionValues[$mod->fileID] ?? [];
$versionLabel = $this->fileVersionLabels[$mod->fileID] ?? [];
if (empty($versionLabel)) {
continue;
}
2024-05-21 21:02:49 -04:00
$insertData[] = [
2024-06-01 23:04:06 -04:00
'hub_id' => (int) $mod->fileID,
2024-05-22 01:00:37 -04:00
'user_id' => User::whereHubId($mod->userID)->value('id'),
'name' => $modContent ? $modContent->subject : '',
'slug' => $modContent ? Str::slug($modContent->subject) : '',
'teaser' => $modContent ? (strlen($modContent->teaser) > 100 ? Str::take($modContent->teaser, 97).'...' : $modContent->teaser) : '',
2024-06-01 23:04:06 -04:00
'description' => $this->convertModDescription($modContent?->message ?? ''),
2024-05-22 01:00:37 -04:00
'thumbnail' => $this->fetchModThumbnail($command, $curl, $mod->fileID, $mod->iconHash, $mod->iconExtension),
2024-05-21 21:02:49 -04:00
'license_id' => License::whereHubId($mod->licenseID)->value('id'),
2024-05-22 01:00:37 -04:00
'source_code_link' => $this->fetchSourceLinkValue($modOptions),
2024-06-01 23:04:06 -04:00
'featured' => (bool) $mod->isFeatured,
2024-05-22 01:00:37 -04:00
'contains_ai_content' => $this->fetchContainsAiContentValue($modOptions),
'contains_ads' => $this->fetchContainsAdsValue($modOptions),
2024-06-01 23:04:06 -04:00
'disabled' => (bool) $mod->isDisabled,
2024-05-21 21:02:49 -04:00
'created_at' => Carbon::parse($mod->time, 'UTC'),
'updated_at' => Carbon::parse($mod->lastChangeTime, 'UTC'),
];
}
2024-05-22 01:00:37 -04:00
if (! empty($insertData)) {
2024-05-21 21:02:49 -04:00
Mod::upsert($insertData, ['hub_id'], ['user_id', 'name', 'slug', 'teaser', 'description', 'thumbnail', 'license_id', 'source_code_link', 'featured', 'contains_ai_content', 'disabled', 'created_at', 'updated_at']);
$totalInserted += count($insertData);
2024-05-22 01:00:37 -04:00
$command->line('Processed '.count($insertData).' mods. Total processed so far: '.$totalInserted);
2024-05-21 21:02:49 -04:00
}
unset($insertData);
unset($mods);
}, 'fileID');
2024-05-22 01:00:37 -04:00
curl_close($curl);
$this->info('Total mods processed: '.$totalInserted);
2024-05-21 21:02:49 -04:00
}
2024-05-22 01:00:37 -04:00
protected function getFileOptionValues(): array
2024-05-21 21:02:49 -04:00
{
2024-05-22 01:00:37 -04:00
// Fetch all the data from the `filebase1_file_option_value` table.
$options = DB::connection('mysql_woltlab')->table('filebase1_file_option_value')->get();
2024-05-21 21:02:49 -04:00
2024-05-22 01:00:37 -04:00
// Convert the collection into an associative array
$optionValues = [];
2024-05-21 21:02:49 -04:00
foreach ($options as $option) {
2024-05-22 01:00:37 -04:00
$optionValues[$option->fileID][] = $option;
}
return $optionValues;
}
protected function getFileContent(): array
{
$content = [];
// Fetch select data from the `filebase1_file_content` table.
DB::connection('mysql_woltlab')
->table('filebase1_file_content')
->select(['fileID', 'subject', 'teaser', 'message'])
->orderBy('fileID', 'desc')
->chunk(200, function ($contents) use (&$content) {
foreach ($contents as $contentItem) {
$content[$contentItem->fileID] = $contentItem;
}
});
return $content;
}
protected function fetchSourceLinkValue(array $options): string
{
// Iterate over the options and find the 'optionID' of 5 or 1. Those records will contain the source code link
// in the 'optionValue' column. The 'optionID' of 5 should take precedence over 1. If neither are found, return
// an empty string.
foreach ($options as $option) {
if ($option->optionID == 5 && ! empty($option->optionValue)) {
2024-05-21 21:02:49 -04:00
return $option->optionValue;
}
2024-05-22 01:00:37 -04:00
if ($option->optionID == 1 && ! empty($option->optionValue)) {
2024-05-21 21:02:49 -04:00
return $option->optionValue;
}
}
2024-05-22 01:00:37 -04:00
2024-05-21 21:02:49 -04:00
return '';
}
2024-05-22 01:00:37 -04:00
protected function fetchContainsAiContentValue(array $options): bool
2024-05-21 21:02:49 -04:00
{
// Iterate over the options and find the 'optionID' of 7. That record will contain the AI flag.
foreach ($options as $option) {
if ($option->optionID == 7) {
return (bool) $option->optionValue;
}
}
2024-05-22 01:00:37 -04:00
return false;
2024-05-21 21:02:49 -04:00
}
protected function fetchContainsAdsValue(array $options): bool
{
// Iterate over the options and find the 'optionID' of 3. That record will contain the Ad flag.
foreach ($options as $option) {
if ($option->optionID == 3) {
return (bool) $option->optionValue;
}
}
return false;
}
2024-05-22 01:00:37 -04:00
protected function fetchModThumbnail($command, &$curl, string $fileID, string $thumbnailHash, string $thumbnailExtension): string
2024-05-21 21:02:49 -04:00
{
if (empty($fileID) || empty($thumbnailHash) || empty($thumbnailExtension)) {
return '';
}
// Only the first two characters of the icon hash.
$hashShort = substr($thumbnailHash, 0, 2);
$hubUrl = "https://hub.sp-tarkov.com/files/images/file/$hashShort/$fileID.$thumbnailExtension";
$localPath = "mods/$thumbnailHash.$thumbnailExtension";
// Check to make sure the image doesn't already exist.
if (Storage::disk('public')->exists($localPath)) {
return "/storage/$localPath";
}
$command->output->write("Downloading mod thumbnail: $hubUrl... ");
2024-05-22 01:00:37 -04:00
curl_setopt($curl, CURLOPT_URL, $hubUrl);
$image = curl_exec($curl);
if ($image === false) {
$command->error('Error: '.curl_error($curl));
} else {
Storage::disk('public')->put($localPath, $image);
$command->info('Done.');
}
2024-05-21 21:02:49 -04:00
// Return the path to the saved thumbnail.
return "/storage/$localPath";
2024-05-20 02:19:02 -04:00
}
2024-05-22 01:00:37 -04:00
protected function getFileVersionContent(): array
{
$content = [];
// Fetch select data from the `filebase1_file_version_content` table.
DB::connection('mysql_woltlab')
->table('filebase1_file_version_content')
->select(['versionID', 'description'])
->orderBy('versionID', 'desc')
->chunk(100, function ($contents) use (&$content) {
foreach ($contents as $contentItem) {
$content[$contentItem->versionID] = $content;
}
});
return $content;
}
protected function getFileVersionLabels(): array
{
$labels = [];
// Fetch select data from the `wcf1_label_object` table.
DB::connection('mysql_woltlab')
->table('wcf1_label_object')
->select(['labelID', 'objectID'])
->where('objectTypeID', 387)
->orderBy('labelID', 'desc')
->chunk(100, function ($labelData) use (&$labels) {
foreach ($labelData as $labelItem) {
$labels[$labelItem->objectID] = $labelItem->labelID;
}
});
return $labels;
}
protected function importModVersions(): void
{
$command = $this;
$totalInserted = 0;
DB::connection('mysql_woltlab')->table('filebase1_file_version')->chunkById(500, function (Collection $versions) use (&$command, &$totalInserted) {
2024-05-22 01:00:37 -04:00
foreach ($versions as $version) {
$versionContent = $this->fileVersionContent[$version->versionID] ?? [];
$modOptions = $this->fileOptionValues[$version->fileID] ?? [];
$versionLabel = $this->fileVersionLabels[$version->fileID] ?? [];
$modId = Mod::whereHubId($version->fileID)->value('id');
if (empty($versionLabel) || empty($modId)) {
continue;
}
$insertData[] = [
'hub_id' => $version->versionID,
'mod_id' => $modId,
'version' => $version->versionNumber,
2024-06-01 23:04:06 -04:00
'description' => $this->convertModDescription($versionContent['description'] ?? ''),
2024-05-22 01:00:37 -04:00
'link' => $version->downloadURL,
'spt_version_id' => SptVersion::whereHubId($versionLabel)->value('id'),
'virus_total_link' => $this->fetchVirusTotalLink($modOptions),
2024-06-01 23:04:06 -04:00
'downloads' => max((int) $version->downloads, 0), // Ensure the value is at least 0
'disabled' => (bool) $version->isDisabled,
2024-05-22 01:00:37 -04:00
'created_at' => Carbon::parse($version->uploadTime, 'UTC'),
'updated_at' => Carbon::parse($version->uploadTime, 'UTC'),
];
}
if (! empty($insertData)) {
ModVersion::upsert($insertData, ['hub_id'], ['mod_id', 'version', 'description', 'link', 'spt_version_id', 'virus_total_link', 'downloads', 'created_at', 'updated_at']);
$totalInserted += count($insertData);
$command->line('Processed '.count($insertData).' mod versions. Total processed so far: '.$totalInserted);
}
unset($insertData);
unset($version);
}, 'versionID');
$this->info('Total mod versions processed: '.$totalInserted);
}
protected function fetchVirusTotalLink(array $options): string
{
// Iterate over the options and find the 'optionID' of 6 or 2. Those records will contain the Virus Total link
// in the 'optionValue' column. The 'optionID' of 6 should take precedence over 1. If neither are found, return
// an empty string.
foreach ($options as $option) {
if ($option->optionID == 6 && ! empty($option->optionValue)) {
2024-05-22 01:00:37 -04:00
return $option->optionValue;
}
if ($option->optionID == 2 && ! empty($option->optionValue)) {
2024-05-22 01:00:37 -04:00
return $option->optionValue;
}
}
return '';
}
2024-06-01 23:04:06 -04:00
protected function convertModDescription(string $description): string
{
2024-06-01 23:04:06 -04:00
// Alright, hear me out... Shut up.
$converter = new HtmlConverter();
2024-06-02 03:19:26 +00:00
2024-06-01 23:04:06 -04:00
return $converter->convert(Purify::clean($description));
}
2024-05-20 02:19:02 -04:00
}