Compare commits

...

13 commits

15 changed files with 753 additions and 109 deletions

View file

@ -75,25 +75,6 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/bin)
set(CMAKE_INSTALL_PREFIX $ENV{ACT_HOME} CACHE PATH "installation path" FORCE)
add_subdirectory(src)
# Link the needed libraries into it
target_link_libraries(
${PROJECT_NAME}
act-cluster-lib
actsim-agent-lib
)
# Add the Postgresql library
target_link_libraries(
${PROJECT_NAME}
-lpqxx -lpq
)
# specify install targets
install(
TARGETS actsim-cluster-agent
DESTINATION bin
)
# We don't provide a library
#install(

View file

@ -38,6 +38,7 @@ class DBSimArtifact : public db::DBArtifact {
const db::uuid_t& source_pass,
const db::uuid_t& target_artifact,
const db::uuid_t& design,
const db::uuid_t& reference,
const db::uuid_t& source_config
);
@ -45,6 +46,11 @@ class DBSimArtifact : public db::DBArtifact {
* @brief The UUID of the design this simulation uses
*/
const db::uuid_t& design = design_;
/**
* @brief The UUID of the reference run this simulation uses
*/
const db::uuid_t& reference = reference_;
/**
* @brief The UUID of the simulator configuration
@ -53,6 +59,7 @@ class DBSimArtifact : public db::DBArtifact {
private:
db::uuid_t reference_;
db::uuid_t design_;
db::uuid_t source_config_;
};
@ -65,8 +72,9 @@ class DBSimConfigArtifact : public DBSimArtifact, public pl::SimConfigArtifact {
const db::uuid_t& source_pass,
const db::uuid_t& target_artifact,
const db::uuid_t& design,
const db::uuid_t& reference,
const db::uuid_t& source_config
) : DBSimArtifact(id, source_pass, target_artifact, design,source_config) {};
) : DBSimArtifact(id, source_pass, target_artifact, design, reference, source_config) {};
};
class DBSimOutputArtifact : public DBSimArtifact, public pl::SimOutputArtifact {
public:
@ -76,8 +84,9 @@ class DBSimOutputArtifact : public DBSimArtifact, public pl::SimOutputArtifact {
const db::uuid_t& source_pass,
const db::uuid_t& target_artifact,
const db::uuid_t& design,
const db::uuid_t& reference,
const db::uuid_t& source_config
) : DBSimArtifact(id, source_pass, target_artifact, design,source_config) {};
) : DBSimArtifact(id, source_pass, target_artifact, design, reference, source_config) {};
};

View file

@ -47,6 +47,7 @@ class Downloader {
void thread_run();
bool fetch_tasks(size_t n);
bool fetch_design(const db::uuid_t& id, std::string& design);
std::shared_ptr<pl::SimOutputArtifact> fetch_reference_run(const db::uuid_t& id);
void reopen_task(const db::uuid_t& id, bool halt);
std::unique_ptr<std::thread> downloader_thread;

View file

@ -0,0 +1,71 @@
/*************************************************************************
*
* This file is part of the ACT library
*
* Copyright (c) 2024 Fabian Posch
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
**************************************************************************
*/
#ifndef __LOG_PARSER__
#define __LOG_PARSER__
#include <cluster/artifact.hpp>
#include <cstdint>
#include <vector>
#include "agent_artifact.hpp"
class LogParser {
public:
LogParser(std::unique_ptr<DBSimOutputArtifact>& artifact, std::shared_ptr<pl::SimOutputArtifact> reference);
LogParser(std::unique_ptr<DBSimOutputArtifact>& artifact);
void parse_log(const std::string& line);
void parse_error(const std::string& line);
void finalize();
bool check_busy_deadlock();
private:
using timing_it_t = std::vector<uint32_t>::const_iterator;
void check_token_count(const std::string& line);
void check_value_timing_fault(const std::string& line);
void check_coding_fault(const std::string& line);
void check_glitch(const std::string& line);
uint32_t extract_timestamp(const std::string& line);
void handle_output_token(const std::string& line);
std::unique_ptr<DBSimOutputArtifact>& artifact;
std::shared_ptr<pl::SimOutputArtifact> reference;
bool has_reference;
timing_it_t timing_it;
timing_it_t reference_ott_end;
bool failure_mode = false;
int dut_output_tokens_ = 0;
int output_token_difference_ = 0;
};
#endif

View file

@ -63,11 +63,22 @@ class TaskInterface {
std::unique_ptr<OutputType> pop_finished(bool& empty);
size_t get_buffer_space();
/*
* Store a design entry locally
*/
bool increment_design(const db::uuid_t& id);
void decrement_design(const db::uuid_t& id);
std::string get_design(const db::uuid_t& id);
void store_design(const db::uuid_t&, std::string& design);
/*
* Store a reference run locally
*/
bool increment_reference(const db::uuid_t& id);
void decrement_reference(const db::uuid_t& id);
std::shared_ptr<pl::SimOutputArtifact> get_reference(const db::uuid_t& id);
void store_reference(const db::uuid_t&, std::shared_ptr<pl::SimOutputArtifact> reference_run);
bool running() { return this->running_.load(std::memory_order_relaxed); };
bool is_stop_immediate() { return this->immediate_stop.load(std::memory_order_relaxed); };
void stop();
@ -87,6 +98,7 @@ class TaskInterface {
volatile std::atomic_bool immediate_stop;
std::unordered_map<db::uuid_t, std::pair<size_t, std::string>> designs;
std::unordered_map<db::uuid_t, std::pair<size_t, std::shared_ptr<pl::SimOutputArtifact>>> references;
////// Mutexes //////
@ -95,6 +107,9 @@ class TaskInterface {
// design map access
std::mutex designs_mutex;
// reference map access
std::mutex references_mutex;
// notify upload thread that the finished queue is ready for cleanup
std::atomic_bool cleanup_ready;

View file

@ -28,6 +28,7 @@
#include <cluster/db_types.hpp>
#include <cluster/db_client.hpp>
#include <memory>
#include <thread>
#include "task_interface.hpp"
@ -44,6 +45,7 @@ class Uploader {
void thread_run();
bool upload_task(std::unique_ptr<OutputType> task);
std::string build_fault_flags(const std::unique_ptr<OutputType>& task);
std::unique_ptr<std::thread> uploader_thread;
std::unique_ptr<db::Connection> conn;

View file

@ -48,8 +48,6 @@ class Worker {
void thread_run();
std::unique_ptr<OutputType> perform_task(std::unique_ptr<InputType>& task, bool& finished);
std::unique_ptr<OutputType> pipe_error(bool& finished);
std::unique_ptr<std::thread> worker_thread;
std::atomic<db::uuid_t> current_task;

View file

@ -17,3 +17,17 @@ add_executable(
${PROJECT_NAME}
${proj_SRC}
)
# Link the needed libraries into it
target_link_libraries(
${PROJECT_NAME}
act-cluster-lib
${actsim_agent_lib}
-lpqxx -lpq
)
# specify install targets
install(
TARGETS actsim-cluster-agent
DESTINATION bin
)

View file

@ -8,14 +8,24 @@ file(
"*.cpp"
)
set(actsim_agent_lib actsim_agent)
set(actsim_agent_lib ${actsim_agent_lib} PARENT_SCOPE)
add_library(
actsim-agent-lib
${actsim_agent_lib}
SHARED
${actsim_agent_SRC}
)
target_link_libraries(
actsim-agent-lib
${actsim_agent_lib}
act-cluster-lib
-lpqxx -lpq -latomic
)
)
# specify install targets
install(
TARGETS ${actsim_agent_lib}
DESTINATION lib
)

View file

@ -30,8 +30,10 @@ DBSimArtifact::DBSimArtifact(
const db::uuid_t& source_pass,
const db::uuid_t& target_artifact,
const db::uuid_t& design,
const db::uuid_t& reference,
const db::uuid_t& source_config
) : DBArtifact(id, source_pass, target_artifact) {
this->design_ = design;
this->reference_ = reference;
this->source_config_ = source_config;
}

View file

@ -23,13 +23,16 @@
**************************************************************************
*/
#include <cstddef>
#include <cstdint>
#include <string>
#include <cluster/artifact.hpp>
#include <filesystem>
#include <cstdio>
#include <cstdlib>
#include <pqxx/pqxx>
#include <functional>
#include <chrono>
#include <vector>
#include "util.h"
#include "downloader.hpp"
@ -72,7 +75,7 @@ void Downloader::thread_run() {
// if the download buffer is not full, fetch some more tasks
if (!this->fetch_tasks(this->interface.get_buffer_space())) {
// we can sleep for a certain amount of time, nothing to do
DEBUG_PRINT("Going to sleep. Checking for more tasks in a bit...");
// DEBUG_PRINT("Going to sleep. Checking for more tasks in a bit...");
std::this_thread::sleep_for(NOTHING_AVAILABLE_SLEEP_TIME);
}
@ -102,13 +105,14 @@ bool Downloader::fetch_tasks(size_t n) {
for (size_t i = 0; i < n; ++i) {
// fetch a new task from the database
auto fetch_task_lambda = [](
auto fetch_task_lambda = [n](
pqxx::work *txn,
bool *task_avail,
pl::testcase_t *testcase,
db::uuid_t *target_artifact,
db::uuid_t *source_pass,
db::uuid_t *design,
db::uuid_t *reference,
db::uuid_t *source_config,
db::uuid_t *id
) {
@ -118,7 +122,7 @@ bool Downloader::fetch_tasks(size_t n) {
// 2.) Passes that are already in progress are preferred
// 3.) New passes are started in the order they were added to the database
// 4.) Passes are only started if all their dependencies are fulfilled
auto res = txn->exec(
auto res = txn->exec_params(
"SELECT "
" ap.design_file AS design, "
" ap.top_proc, "
@ -126,6 +130,7 @@ bool Downloader::fetch_tasks(size_t n) {
" ap.id AS source_pass, "
" sc.id AS source_config, "
" sc.sim_commands, "
" sc.has_reference AS reference, "
" so.id AS id "
"FROM "
" actsim_passes ap "
@ -148,10 +153,19 @@ bool Downloader::fetch_tasks(size_t n) {
" AND dep.pass_status != 'finished' "
" ) "
" ) "
" AND (sc.has_reference IS NULL OR "
" EXISTS ("
" SELECT 1 "
" FROM sim_outputs out "
" WHERE out.sim_config = sc.has_reference "
" AND out.part_status = 'finished' "
" ) "
" ) "
"ORDER BY "
" ap.pass_status = 'in_progress' DESC, "
" j.time_added ASC "
"LIMIT 1;"
"LIMIT $1;",
n
);
// seems like there is nothing to do right now
@ -167,17 +181,15 @@ bool Downloader::fetch_tasks(size_t n) {
*target_artifact = row["target_artifact"].as<db::uuid_t>();
*source_pass = row["source_pass"].as<db::uuid_t>();
*design = row["design"].as<db::uuid_t>();
if (!row["reference"].is_null()) {
*reference = row["reference"].as<db::uuid_t>();
} else {
*reference = db::uuid_t();
}
*source_config = row["source_config"].as<db::uuid_t>();
std::vector<std::string> commands;
auto arr = row["sim_commands"].as_array();
std::pair<pqxx::array_parser::juncture, std::string> elem;
do {
elem = arr.get_next();
if (elem.first == pqxx::array_parser::juncture::string_value) {
commands.push_back(elem.second);
}
} while (elem.first != pqxx::array_parser::juncture::done);
auto com_arr = row["sim_commands"].as_sql_array<std::string>();
std::vector<std::string> commands(com_arr.cbegin(), com_arr.cend());
*testcase = {
commands,
@ -211,13 +223,14 @@ bool Downloader::fetch_tasks(size_t n) {
};
std::function<void(pqxx::work*, bool*, pl::testcase_t*, db::uuid_t*, db::uuid_t*, db::uuid_t*, db::uuid_t*, db::uuid_t*)> fetch_task_func = fetch_task_lambda;
std::function<void(pqxx::work*, bool*, pl::testcase_t*, db::uuid_t*, db::uuid_t*, db::uuid_t*, db::uuid_t*, db::uuid_t*, db::uuid_t*)> fetch_task_func = fetch_task_lambda;
bool task_avail;
pl::testcase_t testcase;
db::uuid_t target_artifact;
db::uuid_t source_pass;
db::uuid_t design;
db::uuid_t reference;
db::uuid_t source_config;
db::uuid_t id;
@ -228,6 +241,7 @@ bool Downloader::fetch_tasks(size_t n) {
&target_artifact,
&source_pass,
&design,
&reference,
&source_config,
&id
)) {
@ -246,7 +260,7 @@ bool Downloader::fetch_tasks(size_t n) {
DEBUG_PRINT("Fetched task with id " + db::to_string(id) + ", stemming from pass " + db::to_string(source_pass) + ", outputting to artifact " + db::to_string(target_artifact));
DEBUG_PRINT("Design used is " + db::to_string(design) + ", simulation config " + db::to_string(source_config));
auto task = std::make_unique<DBSimConfigArtifact>(id, source_pass, target_artifact, design, source_config);
auto task = std::make_unique<DBSimConfigArtifact>(id, source_pass, target_artifact, design, reference, source_config);
task->add_testcase(testcase);
// see if we already have the design locally; if not, load it
@ -265,6 +279,26 @@ bool Downloader::fetch_tasks(size_t n) {
this->interface.store_design(design, design_path);
}
// if we have a reference for this run, we have to see if it is loaded
if (reference != 0) {
// see if we already have the reference run locally; if not, load it
if (!this->interface.increment_reference(reference)) {
DEBUG_PRINT("Fetching new reference run with ID " + db::to_string(reference));
std::shared_ptr<pl::SimOutputArtifact> reference_run;
// if we could not load the reference run, reopen the task in the database
if ((reference_run = this->fetch_reference_run(reference)) == nullptr) {
std::cerr << "Error: Could not load reference run for task " << task->id << ", reopening it." << std::endl;
this->reopen_task(task->id, true);
continue;
}
this->interface.store_reference(reference, reference_run);
}
}
// push the task to the list of open tasks
this->interface.push_fresh(std::move(task));
@ -274,7 +308,6 @@ bool Downloader::fetch_tasks(size_t n) {
}
bool Downloader::fetch_design(const db::uuid_t& id, std::string& design) {
design = "test design";
DEBUG_PRINT("Loading design with ID " + db::to_string(id) + " from database.");
@ -336,15 +369,70 @@ bool Downloader::fetch_design(const db::uuid_t& id, std::string& design) {
return true;
}
void Downloader::reopen_task(const db::uuid_t& id, bool halt) {
DEBUG_PRINT("Reopening task with ID " + db::to_string(id));
std::shared_ptr<pl::SimOutputArtifact> Downloader::fetch_reference_run(const db::uuid_t& id) {
// open up the status of this partial output in the database again
auto task_reopen_lambda = [](pqxx::work *txn, const db::uuid_t *task, db::JobStatusType *status) {
txn->exec_params0("UPDATE sim_outputs SET part_status = $2 WHERE id = $1 AND part_status = 'in_progress';", *task, *status);
DEBUG_PRINT("Loading reference run with ID " + db::to_string(id) + " from database.");
auto fetch_design_lambda = [id](
pqxx::work *txn,
std::vector<uint32_t> *output_token_timings,
long *output_tokens,
long *log_size,
bool *found
) {
try {
auto res = txn->exec_params1("SELECT output_tokens, output_token_timings, log_size FROM sim_outputs WHERE sim_config = $1;", id);
// load the output token timings
auto arr_ott = res["output_token_timings"].as_sql_array<uint32_t>();
*output_token_timings = std::vector<uint32_t>(arr_ott.cbegin(), arr_ott.cend());
*output_tokens = res["output_tokens"].as<long>();
*log_size = res["log_size"].as<long>();
*found = true;
} catch (pqxx::unexpected_rows& e) {
std::cerr << "Error: Failed to fetch reference run " << id << ": " << e.what() << std::endl;
*found = false;
}
};
std::function<void(pqxx::work*, const db::uuid_t*, db::JobStatusType*)> task_reopen_func = task_reopen_lambda;
std::function<void(
pqxx::work *txn,
std::vector<uint32_t> *output_token_timings,
long *output_tokens,
long *log_size,
bool *found
)> fetch_design_func = fetch_design_lambda;
std::vector<uint32_t> output_token_timings;
long output_tokens;
long log_size;
bool ref_run_found;
if (!this->conn->send_request(&fetch_design_func, &output_token_timings, &output_tokens, &log_size, &ref_run_found)) {
// if we lost connection, there's nothing else we can really do
std::cerr << "Error: Lost connection while trying fetch a design! Aborting!" << std::endl;
this->interface.stop();
this->interface.stop_immediately();
return nullptr;
}
if (!ref_run_found) {
return nullptr;
}
auto reference = std::make_shared<pl::SimOutputArtifact>();
reference->set_output_token_timings(output_token_timings);
reference->set_output_tokens(output_tokens);
reference->set_size(log_size);
return reference;
}
void Downloader::reopen_task(const db::uuid_t& id, bool halt) {
DEBUG_PRINT("Reopening task with ID " + db::to_string(id));
db::JobStatusType status;
@ -355,7 +443,14 @@ void Downloader::reopen_task(const db::uuid_t& id, bool halt) {
status = db::JobStatusType::NOT_STARTED;
}
if (!this->conn->send_request(&task_reopen_func, &id, &status)) {
// open up the status of this partial output in the database again
auto task_reopen_lambda = [id, status](pqxx::work *txn) {
txn->exec_params0("UPDATE sim_outputs SET part_status = $2 WHERE id = $1 AND part_status = 'in_progress';", id, status);
};
std::function<void(pqxx::work*)> task_reopen_func = task_reopen_lambda;
if (!this->conn->send_request(&task_reopen_func)) {
// if we lost connection, there's nothing else we can really do
std::cerr << "Error: Lost connection while trying to reopen task " << id << "! Database might be compromised! Aborting!" << std::endl;
this->interface.stop();

View file

@ -0,0 +1,269 @@
/*************************************************************************
*
* This file is part of the ACT library
*
* Copyright (c) 2024 Fabian Posch
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
**************************************************************************
*/
#include <regex>
#include "util.h"
#include "log_parser.hpp"
#include <cstdint>
#include <vector>
#include <string>
LogParser::LogParser(std::unique_ptr<DBSimOutputArtifact>& artifact, std::shared_ptr<pl::SimOutputArtifact> reference) : artifact(artifact) {
this->reference = reference;
this->has_reference = true;
// get the output token timing iterator
this->timing_it = reference->get_output_token_timings().begin();
this->reference_ott_end = reference->get_output_token_timings().end();
}
LogParser::LogParser(std::unique_ptr<DBSimOutputArtifact>& artifact) : artifact(artifact) {
this->has_reference = false;
};
void LogParser::parse_log(const std::string& line) {
// DEBUG_PRINT("Parsing log line " + line);
// check for output tokens
check_token_count(line);
// check for excl high constraint violations
check_coding_fault(line);
// check for value fault
check_value_timing_fault(line);
// check for glitch
check_glitch(line);
this->artifact->add_log_output(line);
}
void LogParser::parse_error(const std::string& line) {
// DEBUG_PRINT("Parsing error line " + line);
// actsim actually outputs everything on stdout
// Only the warnings in the beginning are on stderr.
this->artifact->add_err_output(line);
}
void LogParser::finalize() {
/*
So the only way to do this cleanly is to make sure that either
- the model has not sent all tokens yet -> deadlock
- or the model has sent everything but there is one missing -> token fault
This has the consequence that we cannot inject close to the end of the test!
This also means that we don't really know if a deadlock occurred without
a reference run to go off.
Only that there was a potential token count difference.
*/
if (has_reference) {
// model has not sent all tokens yet
if ((dut_output_tokens_ + output_token_difference_) < reference->output_tokens) {
// a deadlock must have occured
artifact->set_fault_deadlock(true);
failure_mode = true;
DEBUG_PRINT("Deadlock detected during finalization (compared to reference)");
DEBUG_PRINT("Reference had " +
std::to_string(reference->output_tokens) +
" tokens, task had " +
std::to_string(dut_output_tokens_) +
" + " + std::to_string(output_token_difference_));
// model has sent all tokens but DUT has not sent all of them
} else if (output_token_difference_ != 0) {
// a token amount error has occurred
artifact->set_fault_token_count(true);
failure_mode = true;
DEBUG_PRINT("Token count mismatch detected during finalization (compared to reference)");
}
// if there is no failure condition,
// we don't need to save the log
if (!failure_mode) {
artifact->clear_logs();
}
} else {
// if token difference != 0: token count error
if (output_token_difference_ != 0) {
artifact->set_fault_token_count(true);
artifact->set_output_tokens(dut_output_tokens_);
DEBUG_PRINT("Token count mismatch detected during finalization.");
}
}
}
inline void LogParser::check_token_count(const std::string& line) {
// difference counter should be back at 0 when log is finished
// -> both model and DUT have emitted the same number of tokens
const std::string model_token = "Model response received";
const std::string dut_token = "DUT response received";
if (line.find(model_token) != std::string::npos) {
++output_token_difference_;
}
if (line.find(dut_token) != std::string::npos) {
--output_token_difference_;
++dut_output_tokens_;
}
}
inline void LogParser::check_value_timing_fault(const std::string& line) {
// simply check if the standard test failed output
// is given by the scoreboard
const std::string test_failed = "TEST FAILED";
if (line.find(test_failed) != std::string::npos) {
artifact->set_fault_value(true);
failure_mode = true;
DEBUG_PRINT("Value error detected");
handle_output_token(line);
return;
}
// if we passed the test we still need to check for timing issues
const std::string test_succeeded = "TEST SUCCESS";
if (line.find(test_succeeded) != std::string::npos) {
// DEBUG_PRINT("Successful output token detected");
handle_output_token(line);
return;
}
}
inline void LogParser::handle_output_token(const std::string& line) {
// add the timing to the artifact
auto timing = extract_timestamp(line);
artifact->add_output_token_timing(timing);
// check if there is also a timing error
if (has_reference) {
// make sure there is still a token to compare to left
if (timing_it == reference_ott_end) {
// there is a mismatch in tokens
artifact->set_fault_token_count(true);
failure_mode = true;
DEBUG_PRINT("Tried to compare token timing but no reference token left.");
return;
}
// check if the timings align
if (timing != *timing_it) {
// timings don't line up!
artifact->set_fault_timing_deviation(true);
failure_mode = true;
DEBUG_PRINT("Token timing does not line up.");
}
// increment the iterator
++timing_it;
}
}
inline void LogParser::check_coding_fault(const std::string& line) {
// check for actsim's excl-hi warning
const std::string excl_hi_violated = "WARNING: excl-hi constraint in";
if (line.find(excl_hi_violated) != std::string::npos) {
artifact->set_fault_coding(true);
failure_mode = true;
DEBUG_PRINT("Excl-hi constraint violated");
}
}
bool LogParser::check_busy_deadlock() {
// we allow for more than 3x the events to happen compared
// to the reference run, then we assume a deadlock
// if there is no reference, we have nothing to do
if (!has_reference) {
return false;
}
if (artifact->get_size() > (reference->get_size() * 3)) {
failure_mode = true;
DEBUG_PRINT("Busy deadlock detected, reference size is " +
std::to_string(reference->get_size()) +
", ours is " + std::to_string(artifact->get_size())
);
return true;
}
return false;
}
inline void LogParser::check_glitch(const std::string& line) {
// simply check if the standard glitch output is given
const std::string glitch_detected = "Glitch detected in channel";
if (line.find(glitch_detected) != std::string::npos) {
artifact->set_fault_glitch(true);
failure_mode = true;
DEBUG_PRINT("Glitch in interface detected");
}
}
inline uint32_t LogParser::extract_timestamp(const std::string& line) {
// regex match the timestamp format
std::regex pattern(R"(^\[\s*(\d+)\s*\])");
std::smatch match;
if (std::regex_search(line, match, pattern)) {
return std::stoi(match[1].str());
} else {
return 0;
}
}

View file

@ -23,6 +23,7 @@
**************************************************************************
*/
#include "util.h"
#include <cstdio>
#include "task_interface.hpp"
@ -41,6 +42,8 @@ TaskInterface::~TaskInterface() {
void TaskInterface::push_fresh(std::unique_ptr<InputType> task) {
DEBUG_PRINT("New task in the queue!");
// lock the queue and insert into it
std::lock_guard<std::mutex> lock(this->fresh_queue_mutex);
this->fresh_queue.push(std::move(task));
@ -86,6 +89,8 @@ void TaskInterface::wait_for_fresh() {
// we will be notified either when there is new data or the program has been stopped
this->fresh_queue_empty_condition.wait(lock, [&] { return !this->fresh_queue_empty() || !running(); });
DEBUG_PRINT("Worker released from block");
}
void TaskInterface::wait_for_finished() {
@ -165,16 +170,16 @@ void TaskInterface::decrement_design(const db::uuid_t& id) {
// if the reference counter hit 0, erase the design entry from the list
// of available designs
if (design_entry.first == 0) {
DEBUG_PRINT("Reference counter has hit 0. Deleting temp file from disk...");
// if (design_entry.first == 0) {
// DEBUG_PRINT("Reference counter has hit 0. Deleting temp file from disk...");
// delete the temporary file from disk
DEBUG_PRINT("Deleting design file from disk.");
std::remove(design_entry.second.c_str());
// // delete the temporary file from disk
// DEBUG_PRINT("Deleting design file from disk.");
// std::remove(design_entry.second.c_str());
DEBUG_PRINT("Erasing design from store.");
this->designs.erase(id);
}
// DEBUG_PRINT("Erasing design from store.");
// this->designs.erase(id);
// }
}
std::string TaskInterface::get_design(const db::uuid_t& id) {
@ -192,7 +197,7 @@ std::string TaskInterface::get_design(const db::uuid_t& id) {
void TaskInterface::store_design(const db::uuid_t& id, std::string& design) {
std::lock_guard<std::mutex> lock (this->designs_mutex);
DEBUG_PRINT("Henlo Storing new design with ID " + db::to_string(id));
DEBUG_PRINT("Storing new design with ID " + db::to_string(id));
// make sure the design isn't already in the list of design entries
// if it is, just increment its reference counter
@ -206,6 +211,80 @@ void TaskInterface::store_design(const db::uuid_t& id, std::string& design) {
this->designs[id] = {1, design};
}
bool TaskInterface::increment_reference(const db::uuid_t& id) {
std::lock_guard<std::mutex> lock (this->references_mutex);
DEBUG_PRINT("Looking for reference run with ID " + db::to_string(id));
// make sure the requested reference run is in the list of available reference runs
if (this->references.find(id) == this->references.end()) {
DEBUG_PRINT("Reference run not found.");
return false;
}
auto& reference_run = references[id];
// if so, increment its reference counter
++reference_run.first;
DEBUG_PRINT("Reference run found. Incrementing reference counter. New counter is " + std::to_string(reference_run.first));
return true;
}
void TaskInterface::decrement_reference(const db::uuid_t& id) {
std::lock_guard<std::mutex> lock (this->references_mutex);
DEBUG_PRINT("Looking to decrement reference run with ID " + db::to_string(id));
// make sure the requested reference run is in the list of available reference runs
if (this->references.find(id) == this->references.end()) {
DEBUG_PRINT("Could not find reference run. Not decrementing.");
return;
}
auto& reference_run = references[id];
// if so, decrement its reference counters
--reference_run.first;
DEBUG_PRINT("Reference run found. Decrementing reference counter. New counter is " + std::to_string(reference_run.first));
// if the reference counter hit 0, erase the reference run entry from the list
// of available reference runs
// if (reference_run.first == 0) {
// DEBUG_PRINT("Reference counter has hit 0. Erasing reference run from map...");
// this->references.erase(id);
// }
}
std::shared_ptr<pl::SimOutputArtifact> TaskInterface::get_reference(const db::uuid_t& id) {
std::lock_guard<std::mutex> lock (this->references_mutex);
// make sure the requested reference run is in the list of available reference runs
if (this->references.find(id) == this->references.end()) {
std::cerr << "Error: Reference run was somehow deleted before it could reach the execution stage. This should really never happen!" << std::endl;
return std::make_shared<pl::SimOutputArtifact>();
}
return this->references[id].second;
}
void TaskInterface::store_reference(const db::uuid_t& id, std::shared_ptr<pl::SimOutputArtifact> reference) {
std::lock_guard<std::mutex> lock (this->references_mutex);
DEBUG_PRINT("Storing new reference with ID " + db::to_string(id));
// make sure the reference run isn't already in the list of reference runs
// if it is, just increment its reference counter
if (this->references.find(id) != this->references.end()) {
DEBUG_PRINT("Reference run is already in here, incrementing reference counter instead.");
++(this->references[id]).first;
return;
}
// otherwise, create a new entry for this design
this->references[id] = {1, reference};
}
size_t TaskInterface::get_buffer_space() {
std::lock_guard<std::mutex> lock(this->fresh_queue_mutex);
return this->buffer_size - this->fresh_queue.size();

View file

@ -23,9 +23,12 @@
**************************************************************************
*/
#include <memory>
#include <pqxx/pqxx>
#include <functional>
#include <cluster/db_types.hpp>
#include <sstream>
#include "task_interface.hpp"
#include "util.h"
#include "uploader.hpp"
@ -60,7 +63,7 @@ void Uploader::thread_run() {
// program was halted
this->interface.wait_for_finished();
DEBUG_PRINT("Uploader was worken up");
DEBUG_PRINT("Uploader was woken up");
// so first we check if we should still be running
if (!this->interface.running()) break;
@ -117,29 +120,73 @@ void Uploader::thread_run() {
bool Uploader::upload_task(std::unique_ptr<OutputType> task) {
auto&& task_id = task->id;
auto&& sim_log = task->get_content().first;
auto&& sim_error = task->get_content().second;
auto&& output_token_timings = task->get_output_token_timings();
auto&& output_tokens = task->output_tokens;
auto log_size = sim_log.size() + sim_error.size();
const auto&& fault_flags = build_fault_flags(task);
// make sure any task that is uploaded isn't halted in the database
auto task_upload_lambda = [](
pqxx::work *txn,
const db::uuid_t *target,
std::vector<std::string> *sim_log,
std::vector<std::string> *sim_error
auto task_upload_lambda = [ task_id,
sim_log,
sim_error,
output_tokens,
output_token_timings,
fault_flags,
log_size
](
pqxx::work *txn
) {
txn->exec_params0(
"UPDATE sim_outputs SET sim_log = $1, error_log = $2, part_status = 'finished' WHERE id = $3 AND part_status != 'halted';",
*sim_log,
*sim_error,
*target
"UPDATE sim_outputs SET "
" sim_log = $1, "
" error_log = $2, "
" output_tokens = $3, "
" output_token_timings = $4, "
" fault_flags = $5, "
" log_size = $6, "
" part_status = 'finished' "
"WHERE id = $7 AND part_status != 'halted';",
sim_log,
sim_error,
output_tokens,
output_token_timings,
fault_flags,
log_size,
task_id
);
};
std::function<void(
pqxx::work*,
const db::uuid_t*,
std::vector<std::string>*,
std::vector<std::string>*
)> task_upload_func = task_upload_lambda;
std::function<void(pqxx::work*)> task_upload_func = task_upload_lambda;
DEBUG_PRINT("Updating task " + db::to_string(task->id));
return this->conn->send_request(&task_upload_func, &(task->id), &(task->get_content().first), &(task->get_content().second));
return this->conn->send_request(&task_upload_func);
}
std::string Uploader::build_fault_flags(const std::unique_ptr<OutputType>& task) {
// bit mask for faults is
// 0: timing
// 1: value
// 2: coding
// 3: glitch
// 4: deadlock
// 5: token count
// 54 3210
// XX XXXX
std::stringstream flags;
flags << (task->fault_token_count ? "1" : "0");
flags << (task->fault_deadlock ? "1" : "0");
flags << (task->fault_glitch ? "1" : "0");
flags << (task->fault_coding ? "1" : "0");
flags << (task->fault_value ? "1" : "0");
flags << (task->fault_timing_deviation ? "1" : "0");
return flags.str();
}

View file

@ -23,6 +23,8 @@
**************************************************************************
*/
#include <cstddef>
#include <cstring>
#include <iostream>
#include <unistd.h>
#include <signal.h>
@ -30,6 +32,7 @@
#include <errno.h>
#include <cstdlib>
#include "util.h"
#include "log_parser.hpp"
#include "worker.hpp"
Worker::Worker(TaskInterface& interface) : interface(interface) {}
@ -73,6 +76,9 @@ void Worker::thread_run() {
// get the design this task uses; we'll need that later
auto design = task->design;
// get the reference as well; here it's not yet important if the test actually has one
auto reference = task->reference;
// everything is good, perform the given task
bool complete;
auto output = this->perform_task(task, complete);
@ -89,6 +95,11 @@ void Worker::thread_run() {
// if this succeeded, we can decrease the number of
// tasks that require the design we needed for this task
this->interface.decrement_design(design);
// in case this run was compared to a reference, handle that ref counter too
if (reference != 0) {
this->interface.decrement_reference(reference);
}
} else {
// there are two possible reasons the task was not finished
@ -98,6 +109,11 @@ void Worker::thread_run() {
// we got interrupted since, the current task was halted; in this case
// we only wanna decrease our reference counter
this->interface.decrement_design(task->design);
if (reference != 0) {
this->interface.decrement_reference(reference);
}
this->task_interrupted.store(false, std::memory_order_relaxed);
} else {
DEBUG_PRINT("Something went wrong during task execution");
@ -110,18 +126,12 @@ void Worker::thread_run() {
}
}
inline std::unique_ptr<OutputType> Worker::pipe_error(bool& finished) {
std::cerr << "Error: Pipe creation failed. No actsim process can be spawned." << std::endl;
finished = false;
return nullptr;
}
std::unique_ptr<OutputType> Worker::perform_task(std::unique_ptr<InputType>& task, bool& finished) {
if (task->get_content().size() != 1) {
std::cerr << "Error: Simulation configuration in worker thread has more than one testcases to run!" << std::endl;
finished = false;
return std::make_unique<OutputType>(task->id, task->source_pass, task->target_artifact, task->design, task->source_config);
return std::make_unique<OutputType>(task->id, task->source_pass, task->target_artifact, task->design, task->reference, task->source_config);
}
auto testcase = task->get_content().front();
@ -152,26 +162,41 @@ std::unique_ptr<OutputType> Worker::perform_task(std::unique_ptr<InputType>& tas
// Pipe creation needs some error handling just in case
if (pipe(stdin_pipe) < 0) {
return pipe_error(finished);
std::cerr << "Error: Pipe creation failed for stdin pipe. " << strerror(errno) << std::endl;
finished = false;
return nullptr;
}
if (pipe(stdout_pipe) < 0) {
return pipe_error(finished);
std::cerr << "Error: Pipe creation failed for stdout pipe. " << strerror(errno) << std::endl;
finished = false;
return nullptr;
}
if (pipe(stderr_pipe) < 0) {
return pipe_error(finished);
std::cerr << "Error: Pipe creation failed for stderr pipe. " << strerror(errno) << std::endl;
finished = false;
return nullptr;
}
// our side needs nonblocking access to the pipes
if (fcntl(stdin_pipe[WRITE_END], F_SETFL, O_NONBLOCK) < 0) {
return pipe_error(finished);
std::cerr << "Error: Could not set stdin pipe to nonblocking. " << strerror(errno) << std::endl;
finished = false;
return nullptr;
}
if (fcntl(stdout_pipe[READ_END], F_SETFL, O_NONBLOCK) < 0) {
return pipe_error(finished);
std::cerr << "Error: Could not set stdout pipe to nonblocking. " << strerror(errno) << std::endl;
finished = false;
return nullptr;
}
if (fcntl(stderr_pipe[READ_END], F_SETFL, O_NONBLOCK) < 0) {
return pipe_error(finished);
std::cerr << "Error: Could not set stderr pipe to nonblocking. " << strerror(errno) << std::endl;
finished = false;
return nullptr;
}
DEBUG_PRINT("Starting simulator...");
pid_t pid;
@ -266,8 +291,12 @@ std::unique_ptr<OutputType> Worker::perform_task(std::unique_ptr<InputType>& tas
auto bin_str = std::string(std::getenv("ACT_HOME")) + "/bin/actsim";
char* bin = new char[bin_str.length() + 1];
std::strcpy(bin, bin_str.c_str());
std::string arg_str = "-m";
char* arg = new char[arg_str.length() + 1];
std::strcpy(arg, arg_str.c_str());
char* const argv[] = {bin, design_char, top_proc_char, (char*)0};
char* const argv[] = {bin, arg, design_char, top_proc_char, (char*)0};
// and call actsim
execv(argv[0], argv);
@ -292,28 +321,33 @@ std::unique_ptr<OutputType> Worker::perform_task(std::unique_ptr<InputType>& tas
// Close all the child process facing pipe ends
// since this is the parent process, we have to do all the stuff we did before
if (close(stdin_pipe[READ_END]) < 0) {
return pipe_error(finished);
if (close(stdin_pipe[READ_END]) < 0 ||
close(stdout_pipe[WRITE_END]) < 0 ||
close(stderr_pipe[WRITE_END]) < 0
) {
std::cerr << "Error: Could not close parent facing pipe ends. " << strerror(errno) << std::endl;
finished = false;
return nullptr;
}
if (close(stdout_pipe[WRITE_END]) < 0) {
return pipe_error(finished);
}
if (close(stderr_pipe[WRITE_END]) < 0) {
return pipe_error(finished);
}
// create the output artifact
result = std::make_unique<OutputType>(
task->id,
task->source_pass,
task->target_artifact,
task->design,
task->reference,
task->source_config
);
// create the output parser
std::unique_ptr<LogParser> parser;
if (task->reference == 0) {
parser = std::make_unique<LogParser>(result);
} else {
parser = std::make_unique<LogParser>(result, this->interface.get_reference(task->reference));
}
std::vector<std::string>& commands = task->get_content()[0].commands;
size_t command_n = 0;
size_t last_pos = 0;
@ -339,15 +373,15 @@ std::unique_ptr<OutputType> Worker::perform_task(std::unique_ptr<InputType>& tas
if (command_n < commands.size()) {
std::string& cur_command = commands[command_n];
const char* command_buffer = (cur_command.substr(last_pos, cur_command.length()) + "\n").c_str();
size_t command_length = commands[command_n].length() + 1;
auto remaining_command = cur_command.substr(last_pos, cur_command.length()) + "\n";
size_t command_length = remaining_command.length();
// make sure we don't send more than the pipe can actually hold
if (rem_pipe_capacity < command_length) {
last_pos = last_pos + rem_pipe_capacity;
rem_pipe_capacity = write(stdin_pipe[WRITE_END], command_buffer, rem_pipe_capacity);
rem_pipe_capacity = write(stdin_pipe[WRITE_END], remaining_command.c_str(), rem_pipe_capacity);
} else {
rem_pipe_capacity = write(stdin_pipe[WRITE_END], command_buffer, command_length);
rem_pipe_capacity = write(stdin_pipe[WRITE_END], remaining_command.c_str(), command_length);
last_pos = 0;
++command_n;
}
@ -375,7 +409,7 @@ std::unique_ptr<OutputType> Worker::perform_task(std::unique_ptr<InputType>& tas
// make sure any remaining output is added to the log
if (stdout_buf != "") {
result->add_log_output(stdout_buf);
parser->parse_log(stdout_buf);
}
DEBUG_PRINT("STDOUT was closed by child");
@ -390,7 +424,7 @@ std::unique_ptr<OutputType> Worker::perform_task(std::unique_ptr<InputType>& tas
auto pos = stdout_buf.find('\n');
while (pos != std::string::npos) {
DEBUG_PRINT("Log output line was added");
result->add_log_output(stdout_buf.substr(0, pos));
parser->parse_log(stdout_buf.substr(0, pos));
if ((pos + 1) < stdout_buf.length()) {
stdout_buf = stdout_buf.substr(pos+1, stdout_buf.length());
@ -425,7 +459,7 @@ std::unique_ptr<OutputType> Worker::perform_task(std::unique_ptr<InputType>& tas
// make sure any remaining output is added to the log
if (stderr_buf != "") {
result->add_err_output(stderr_buf);
parser->parse_error(stderr_buf);
}
DEBUG_PRINT("STDERR was closed by child");
@ -440,7 +474,7 @@ std::unique_ptr<OutputType> Worker::perform_task(std::unique_ptr<InputType>& tas
auto pos = stderr_buf.find('\n');
while (pos != std::string::npos) {
DEBUG_PRINT("Error output line was added");
result->add_err_output(stderr_buf.substr(0, pos));
parser->parse_error(stderr_buf.substr(0, pos));
if ((pos + 1) < stderr_buf.length()) {
stderr_buf = stderr_buf.substr(pos+1, stderr_buf.length());
@ -467,8 +501,25 @@ std::unique_ptr<OutputType> Worker::perform_task(std::unique_ptr<InputType>& tas
finished = true;
break;
}
// check if we need to abort due to a busy deadlock
if (parser->check_busy_deadlock()) {
finished = true;
kill(pid, SIGKILL);
break;
}
}
parser->finalize();
}
// Close all the remaining pipes
if (close(stdin_pipe[WRITE_END]) < 0 ||
close(stdout_pipe[READ_END]) < 0 ||
close(stderr_pipe[READ_END]) < 0
) {
std::cerr << "Error: Could not close child facing pipe ends. " << strerror(errno) << std::endl;
}
delete[] design_char;