[vcpkg] Improve versioning files generators (#15172)

* Remove port version splitting from x-history

* Parallelize versions file generator

* Use cpu_count()/2 to avoid crashes

* Use generatePortVersionsDb.py output to generate baseline

* Update scripts/generateBaseline.py

Co-authored-by: Adam Johnson <AdamJohnso@gmail.com>

* rename generateBaseline function

* Update toolsrc/src/vcpkg/commands.porthistory.cpp

Co-authored-by: ras0219 <533828+ras0219@users.noreply.github.com>

* Remove unused code

Co-authored-by: Adam Johnson <AdamJohnso@gmail.com>
Co-authored-by: ras0219 <533828+ras0219@users.noreply.github.com>
This commit is contained in:
Victor Romero 2021-01-05 14:36:46 -08:00 committed by GitHub
parent 378ffbb940
commit 6d3d6490eb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 135 additions and 158 deletions

View File

@ -1,50 +1,71 @@
import os
import json
import subprocess
import sys
import json
import time
from pathlib import Path
SCRIPT_DIRECTORY = os.path.dirname(os.path.abspath(__file__))
PORTS_DIRECTORY = os.path.join(SCRIPT_DIRECTORY, '../ports')
VERSIONS_DB_DIRECTORY = os.path.join(SCRIPT_DIRECTORY, '../port_versions')
def generate_baseline(ports_path, output_filepath):
def generate_baseline():
start_time = time.time()
# Assume each directory in ${VCPKG_ROOT}/ports is a different port
port_names = [item for item in os.listdir(
ports_path) if os.path.isdir(os.path.join(ports_path, item))]
PORTS_DIRECTORY) if os.path.isdir(os.path.join(PORTS_DIRECTORY, item))]
port_names.sort()
total = len(port_names)
baseline_versions = {}
for counter, port_name in enumerate(port_names):
vcpkg_exe = os.path.join(SCRIPT_DIRECTORY, '../vcpkg')
print(f'[{counter + 1}/{total}] Getting package info for {port_name}')
output = subprocess.run(
[vcpkg_exe, 'x-package-info', '--x-json', port_name],
capture_output=True,
encoding='utf-8')
baseline_entries = {}
total_count = len(port_names)
for i, port_name in enumerate(port_names, 1):
port_file_path = os.path.join(
VERSIONS_DB_DIRECTORY, f'{port_name[0]}-', f'{port_name}.json')
if output.returncode == 0:
package_info = json.loads(output.stdout)
port_info = package_info['results'][port_name]
if not os.path.exists(port_file_path):
print(
f'Error: No version file for {port_name}.\n', file=sys.stderr)
continue
sys.stderr.write(
f'\rProcessed {i}/{total_count} ({i/total_count:.2%})')
with open(port_file_path, 'r') as db_file:
try:
versions_object = json.load(db_file)
if versions_object['versions']:
last_version = versions_object['versions'][0]
version_obj = {}
if 'version' in last_version:
version_obj['version'] = last_version['version']
elif 'version-date' in last_version:
version_obj['version-date'] = last_version['version-date']
elif 'version-semver' in last_version:
version_obj['version-semver'] - last_version['version-semver']
else:
version_obj['version-string'] = last_version['version-string']
version_obj['port-version'] = last_version['port-version']
baseline_entries[port_name] = version_obj
except json.JSONDecodeError as e:
print(f'Error: Decoding {port_file_path}\n{e}\n')
baseline_object = {}
baseline_object['default'] = baseline_entries
version = {}
for scheme in ['version-string', 'version-semver', 'version-date', 'version']:
if scheme in port_info:
version[scheme] = package_info['results'][port_name][scheme]
break
version['port-version'] = 0
if 'port-version' in port_info:
version['port-version'] = port_info['port-version']
baseline_versions[port_name] = version
else:
print(f'x-package-info --x-json {port_name} failed: ', output.stdout.strip(), file=sys.stderr)
os.makedirs(VERSIONS_DB_DIRECTORY, exist_ok=True)
baseline_path = os.path.join(VERSIONS_DB_DIRECTORY, 'baseline.json')
with open(baseline_path, 'w') as baseline_file:
json.dump(baseline_object, baseline_file)
output = {}
output['default'] = baseline_versions
with open(output_filepath, 'r') as output_file:
json.dump(baseline_versions, output_file)
sys.exit(0)
elapsed_time = time.time() - start_time
print(f'\nElapsed time: {elapsed_time:.2f} seconds')
if __name__ == '__main__':
generate_baseline(
ports_path=f'{SCRIPT_DIRECTORY}/../ports', output_filepath='baseline.json')
def main():
if not os.path.exists(VERSIONS_DB_DIRECTORY):
print(f'Version DB files must exist before generating a baseline.\nRun: `python generatePortVersionsDB`\n')
generate_baseline()
if __name__ == "__main__":
main()

View File

@ -1,17 +1,19 @@
import os
import os.path
import sys
import subprocess
import json
import time
import shutil
from subprocess import CalledProcessError
from json.decoder import JSONDecodeError
import multiprocessing
from pathlib import Path
MAX_PROCESSES = multiprocessing.cpu_count()
SCRIPT_DIRECTORY = os.path.dirname(os.path.abspath(__file__))
PORTS_DIRECTORY = os.path.join(SCRIPT_DIRECTORY, '../ports')
VERSIONS_DB_DIRECTORY = os.path.join(SCRIPT_DIRECTORY, '../port_versions')
def get_current_git_ref():
@ -24,91 +26,62 @@ def get_current_git_ref():
return None
def generate_port_versions_db(ports_path, db_path, revision):
def generate_port_versions_file(port_name):
containing_dir = os.path.join(VERSIONS_DB_DIRECTORY, f'{port_name[0]}-')
os.makedirs(containing_dir, exist_ok=True)
output_file_path = os.path.join(containing_dir, f'{port_name}.json')
if not os.path.exists(output_file_path):
env = os.environ.copy()
env['GIT_OPTIONAL_LOCKS'] = '0'
output = subprocess.run(
[os.path.join(SCRIPT_DIRECTORY, '../vcpkg.exe'),
'x-history', port_name, '--x-json', f'--output={output_file_path}'],
capture_output=True, encoding='utf-8', env=env)
if output.returncode != 0:
print(f'x-history {port_name} failed: ',
output.stdout.strip(), file=sys.stderr)
def generate_port_versions_db(revision):
start_time = time.time()
# Assume each directory in ${VCPKG_ROOT}/ports is a different port
port_names = [item for item in os.listdir(
ports_path) if os.path.isdir(os.path.join(ports_path, item))]
port_names.sort()
PORTS_DIRECTORY) if os.path.isdir(os.path.join(PORTS_DIRECTORY, item))]
total_count = len(port_names)
# Dictionary to collect the latest version of each port as baseline
baseline_objects = {}
baseline_objects['default'] = {}
for counter, port_name in enumerate(port_names):
containing_dir = os.path.join(db_path, f'{port_name[0]}-')
os.makedirs(containing_dir, exist_ok=True)
output_filepath = os.path.join(containing_dir, f'{port_name}.json')
if not os.path.exists(output_filepath):
output = subprocess.run(
[os.path.join(SCRIPT_DIRECTORY, '../vcpkg'),
'x-history', port_name, '--x-json'],
capture_output=True, encoding='utf-8')
if output.returncode == 0:
try:
versions_object = json.loads(output.stdout)
# Put latest version in baseline dictionary
latest_version = versions_object["versions"][0]
baseline_objects['default'][port_name] = {
"version-string": latest_version["version-string"],
"port-version": latest_version["port-version"]
}
with open(output_filepath, 'w') as output_file:
json.dump(versions_object, output_file)
except JSONDecodeError:
print(
f'Malformed JSON from vcpkg x-history {port_name}: ', output.stdout.strip(), file=sys.stderr)
else:
print(f'x-history {port_name} failed: ',
output.stdout.strip(), file=sys.stderr)
# This should be replaced by a progress bar
if counter > 0 and counter % 100 == 0:
elapsed_time = time.time() - start_time
print(
f'Processed {counter} out of {total_count}. Elapsed time: {elapsed_time:.2f} seconds')
# Generate baseline.json
baseline_file_path = os.path.join(db_path, 'baseline.json')
with open(baseline_file_path, 'w') as baseline_output_file:
json.dump(baseline_objects, baseline_output_file)
concurrency = MAX_PROCESSES / 2
print(f'Running {concurrency:.0f} parallel processes')
process_pool = multiprocessing.Pool(MAX_PROCESSES)
for i, _ in enumerate(process_pool.imap_unordered(generate_port_versions_file, port_names), 1):
sys.stderr.write(
f'\rProcessed: {i}/{total_count} ({(i / total_count):.2%})')
process_pool.close()
process_pool.join()
# Generate timestamp
rev_file = os.path.join(db_path, revision)
rev_file = os.path.join(VERSIONS_DB_DIRECTORY, revision)
Path(rev_file).touch()
elapsed_time = time.time() - start_time
print(
f'Processed {total_count} total ports. Elapsed time: {elapsed_time:.2f} seconds')
f'\nElapsed time: {elapsed_time:.2f} seconds')
def main(ports_path, db_path):
def main():
revision = get_current_git_ref()
if not revision:
print('Couldn\'t fetch current Git revision', file=sys.stderr)
sys.exit(1)
rev_file = os.path.join(db_path, revision)
rev_file = os.path.join(VERSIONS_DB_DIRECTORY, revision)
if os.path.exists(rev_file):
print(f'Database files already exist for commit {revision}')
sys.exit(0)
if (os.path.exists(db_path)):
try:
shutil.rmtree(db_path)
except OSError as e:
print(f'Could not delete folder: {db_path}.\nError: {e.strerror}')
generate_port_versions_db(ports_path=ports_path,
db_path=db_path,
revision=revision)
generate_port_versions_db(revision)
if __name__ == "__main__":
main(ports_path=os.path.join(SCRIPT_DIRECTORY, '../ports'),
db_path=os.path.join(SCRIPT_DIRECTORY, '../port_versions'))
main()

View File

@ -58,44 +58,6 @@ namespace vcpkg::Commands::PortHistory
return std::regex_match(version_string, re);
}
std::pair<std::string, int> clean_version_string(const std::string& version_string,
int port_version,
bool from_manifest)
{
// Manifest files and ports that use the `Port-Version` field are assumed to have a clean version string
// already.
if (from_manifest || port_version > 0)
{
return std::make_pair(version_string, port_version);
}
std::string clean_version = version_string;
int clean_port_version = 0;
const auto index = version_string.find_last_of('-');
if (index != std::string::npos)
{
// Very lazy check to keep date versions untouched
if (!is_date(version_string))
{
auto maybe_port_version = version_string.substr(index + 1);
clean_version.resize(index);
try
{
clean_port_version = std::stoi(maybe_port_version);
}
catch (std::exception&)
{
// If not convertible to int consider last fragment as part of version string
clean_version = version_string;
}
}
}
return std::make_pair(clean_version, clean_port_version);
}
vcpkg::Optional<HistoryVersion> get_version_from_text(const std::string& text,
const std::string& git_tree,
const std::string& commit_id,
@ -108,20 +70,17 @@ namespace vcpkg::Commands::PortHistory
{
if (const auto& scf = maybe_scf->get())
{
// TODO: Get clean version name and port version
const auto version_string = scf->core_paragraph->version;
const auto clean_version =
clean_version_string(version_string, scf->core_paragraph->port_version, is_manifest);
// SCF to HistoryVersion
auto version = scf->core_paragraph->version;
auto port_version = scf->core_paragraph->port_version;
return HistoryVersion{
port_name,
git_tree,
commit_id,
commit_date,
Strings::concat(clean_version.first, "#", std::to_string(clean_version.second)),
clean_version.first,
clean_version.second};
Strings::concat(version, "#", port_version),
version,
port_version,
};
}
}
@ -197,29 +156,37 @@ namespace vcpkg::Commands::PortHistory
ret.emplace_back(version);
}
}
// NOTE: Uncomment this code if you're looking for edge cases to patch in the generation.
// Otherwise, x-history simply skips "bad" versions, which is OK behavior.
// else
//{
// Checks::exit_with_message(VCPKG_LINE_INFO, "Failed to get version from %s:%s",
// commit_date_pair.first, port_name);
//}
}
return ret;
}
}
static constexpr StringLiteral OPTION_OUTPUT_FILE = "output";
static const CommandSetting HISTORY_SETTINGS[] = {
{OPTION_OUTPUT_FILE, "Write output to a file"},
};
const CommandStructure COMMAND_STRUCTURE = {
create_example_string("history <port>"),
1,
1,
{},
{{}, {HISTORY_SETTINGS}, {}},
nullptr,
};
static Optional<std::string> maybe_lookup(std::unordered_map<std::string, std::string> const& m,
std::string const& key)
{
const auto it = m.find(key);
if (it != m.end()) return it->second;
return nullopt;
}
void perform_and_exit(const VcpkgCmdArguments& args, const VcpkgPaths& paths)
{
const ParsedArguments options = args.parse_arguments(COMMAND_STRUCTURE);
const ParsedArguments parsed_args = args.parse_arguments(COMMAND_STRUCTURE);
auto maybe_output_file = maybe_lookup(parsed_args.settings, OPTION_OUTPUT_FILE);
std::string port_name = args.command_arguments.at(0);
std::vector<HistoryVersion> versions = read_versions_from_log(paths, port_name);
@ -241,10 +208,26 @@ namespace vcpkg::Commands::PortHistory
root.insert("versions", versions_json);
auto json_string = Json::stringify(root, vcpkg::Json::JsonStyle::with_spaces(2));
System::printf("%s\n", json_string);
if (maybe_output_file.has_value())
{
auto output_file_path = fs::u8path(maybe_output_file.value_or_exit(VCPKG_LINE_INFO));
auto& fs = paths.get_filesystem();
fs.write_contents(output_file_path, json_string, VCPKG_LINE_INFO);
}
else
{
System::printf("%s\n", json_string);
}
}
else
{
if (maybe_output_file.has_value())
{
System::printf(
System::Color::warning, "Warning: Option `--$s` requires `--x-json` switch.", OPTION_OUTPUT_FILE);
}
System::print2(" version date vcpkg commit\n");
for (auto&& version : versions)
{