###-----------------------------------------------------------------------------
### Packages
###-----------------------------------------------------------------------------
if ((!(requireNamespace("metabefor", quietly = TRUE))) ||
packageVersion("metabefor") < "0.3")) {
(stop("You need to have at least version 0.3 of the `metabefor` package installed; ",
"install it with:\n\ninstall.packages('metabefor');");
}
::checkPkgs(
metabefor"here", ### For easily access to files using 'relative paths'
"preregr", ### For specifying (pre)registrations
"synthesisr", ### For plotting
"ggplot2" ### For plotting
);
### Potentially update to the development version of some packages
# ufs::quietGitLabUpdate("r-packages/preregr@dev", quiet = FALSE);
# remotes::install_git("https://codeberg.org/R-packages/rock");
# remotes::install_git("https://codeberg.org/R-packages/metabefor");
# devtools::load_all("C:/git/R/metabefor");
# ufs::quietRemotesInstall("rmetaverse/synthesisr",
# func = "install_github", quiet = FALSE);
###-----------------------------------------------------------------------------
### Paths
###-----------------------------------------------------------------------------
<- here::here();
basePath <- file.path(basePath, "prereg");
preregPath <- file.path(basePath, "scripts");
scriptPath <- file.path(basePath, "search");
searchPath <- file.path(basePath, "screening");
screeningPath <- file.path(basePath, "extraction");
extractionPath <- file.path(basePath, "extraction-Rxs-spec");
rxsSpecPath <- file.path(basePath, "output");
outputPath
###-----------------------------------------------------------------------------
### Settings
###-----------------------------------------------------------------------------
::opts_chunk$set(
knitrecho = TRUE,
comment = ""
);
###-----------------------------------------------------------------------------
### Extraction script Google sheets URL
###-----------------------------------------------------------------------------
<-
rxsSpec_googleSheetsURL paste0("https://docs.google.com/spreadsheets/d/",
"1hNu8IC1Y8bIXq-Bjgm5VFfNOiTEx1OunO5Cp3rmSO6g");
⛏️ARCHEOLOGISTS⛏️ 💞EMPATHS💞 1
Introduction & setup
The preregistration for this document is at .
This is EMPATHS-1 (EMPATHS stands for EMPATHS: Machine-readable Publications to Analyze, Teach, Hypothesize, and Synthesize). EMPATHS-1 is an ARCHEOLOGISTS project (see https://archeologists.opens.science).
Here is the Codeberg repo for this project,here is the URL to the rendered version of this R Markdown file at Codeberg Pages, and here is the URL to the Open Science Framework project. The main Google Docs file for this project is here.
The extraction procedure is here, the PDF with the extraction instructions is located here, and the Rxs template is located here.
Note: this file was based on NITRO, the Narrated Illustration of a Transparent Review Outline, which accompanies the SysRevving book and the metabefor
package. Throughout this file, links to the corresponding SysRevving chapters will be provided. For general reference, you may want to keep the SysRevving glossary ready.
Setup
Here we check for the required packages (without loading them into R’s search path with library()
or require()
, to safeguard against accidently forgetting to use the package::function()
syntax), specify the paths, and set script-wide settings.
Planning
Research Question
(link to corresponding SysRevving chapter)
Example: The research question is whether the exponential explosion of the scientific literature is also reflected in a growing evidence base for health promotion interventions targeting recreational substance use.
Planning: Synthesis
(link to corresponding SysRevving chapter)
Example: To answer the research question, our synthesis will consist of a plot with years on the X axis, cumulative number of publications on the Y axis, and with spearately, differently colored lines for each substance.
Planning: Extraction
(link to corresponding SysRevving chapter)
Example: The R extraction script specification (Rxs spec) is stored in this Rxs spec google sheet. The below chunks load it, convert it into the Rxs template (that will then be copied and completed for each source from which data are extracted), and show these specifications.
R extraction script specification
Extractor instructions
::write_extractor_instructions(
metabefor
rxsSpecObject );
Extractor instructions
Welcome!
Welcome to the extraction instructions for EMPATHS-1, the first systematic review in the EMPATHS project. If this is new to you, you may want to start at https://archeologists.opens.science/empaths.html. This PDF with extraction instructions is available from https://archeologists.opens.science/empaths-1/extractor-instructions.In this project, the focus is on construct definitions and measurement methods. Therefore, during extraction, these are the main entities that you will spend time on. In addition to the brief extraction instructions specified in these instructions and in the extraction script (.Rxs file), where you will register the extracted data, more extensive instructions are provided here.
Please start by reading these instructions carefully, as well as the extraction instructions at https://archeologists.opens.science/empaths-1/extraction. The instructions have two parts: this first part contains general instructions. The second part, starting from “Entity overview (list)”, contains entity-specific extraction instructions that will also be included in the Rxs Template (the “R Extraction Script Template”). That template is where you will conduct the extraction.
You will start any extraction by copying that template file to a new filename, and then opening that new file to enter the extracted information.
Naming the Rxs file
The filename should follow this format:
“name_year_sourceId_extractorId.rxs.rmd”
Where ‘name’ is the first word in the last (family) name of the first author, stripped of all characters other than a-z or A-Z; ‘year’ is the year of publication of the source; ‘sourceId’ is the source’s unique identifier (the ShortDOI if available; otherwise, the QURID); and ‘extractorId’ is the extractor’s unique identifier (i.e., your identifier).
General extraction instructions
To extract information from the sources, you scroll through the Rxs Template (that you just stored under a new name) and specify what you found for each entity.
You usually enter information by replacing the NULL with the entity content.
Usually, if something is not reported, replace the NULL with NA (also without quotes).
If you extract a number, you can usually just replace the NULL with that number. If you extract text, make sure to use double quotes around the text string.
Sometimes, you can extract multiple values (you can see this in the entity extraction instructions or in the instructions for the value template). In that case, you place them within a “concatenator” or “combiner”: c(). For example, c(1, 2, 3) for numbers, or c(“one”, “two”, “three”) for text strings.
Validating your extraction results
If you completed an R Extraction Script (.Rxs.Rmd file), you can (and should) immediately verify whether everything went well. There are two ways to do this. First, there’s the Extraction Validation App (EVA). Eva lives is at https://opens.science/apps/eva, and can validate your completed extraction script regardless of where you performed the extraction. Second, if you performed the extraction in RStudio, you can render the extraction script with CTRL-ALT-K. This will also produce the validation report, showing which entities validated and what is imported if your extraction script is parsed.
Extract construct definitions
You extract the empathy definition in a so-called “clustering entity” or “list entity”: a set of closely related entities placed closely together in the Rxs template. If you don’t use RStudio for extraction (and so do not benefit of syntax coloring), this can look a bit confusing; clustering entities contain the entity itself (often with default value NULL), immediately followed on the same line by a comment (starting with three hashes, “###”) with the entity’s description, extraction instruction, and the corresponding value template description. Because this is a lot of text, editors (such as Notepad++ and RStudio) will often apply soft word wrapping (splitting long lines and displaying them over multiple lines to prevent them from disappearing off the right side of the screen). You may want to study this clustering entity closely the first time.
The first entity in this clustering entity is the Empathy Construct Identifier for this specific definition. This is used to allows multiple empathy constructs to be defined in (and extracted from) the same source. However, you have to specify an Empathy Construct Identifier even if a source only has one definition: in other words, you always have to specify it. Remember: identifiers can only contains letters, digits, and underscores, and must start with a letter.
The second entity in this clustering entity is the empathy definition. When looking for a definition of empathy, start by using your source viewer (e.g. if the source is in PDF format, it may be your browser (e.g. Firefox) or a dedicated PDF viewer such as Sumatra or Adobe Acrobat) and use the search/find functionality to look for the text string “empathy” (assuming the source was written in English). Ignore definitions in the abstract. If the first occurrence of the construct name is accompanied by its definition, as the authors use it in their work, copy that definition into the extraction script. An example of a (very very brief) definition you might encounter is “Empathy is the ability to understand and relate to the emotions and experiences of others and to effectively communicate that understanding.” (note that definitions can also be much longer).
However, if the first occurrence is accompanied by a definition that the authors discuss, but not as a definition they use themselves but rather e.g. to introduce readers to the definitions that exist, move to the next occurrence. Similarly, if the first occurrence of the word is not accompanied by a definition at all, move to the next occurrence. For each occurrence, repeat this evaluation: are the authors defining what exactly empathy is? In other words, which parts of the human psyche they consider constituting empathy, and which they consider to reflect other constructs?
Once you extracted the first fragment (i.e. one or more sentences), repeat your search to see whether the authors provide additional aspects of their definition further on in the introduction. If they do, extract those as well. Extract fragments that occur at different places on the text as separate text elements (e.g. c("first bit", "second bit")
.
If the authors do not provide an explicit definition, then they may instead cite another source (e.g. an article or a book) and refer to the definition there as the one they use. In that case, obtain the shortdoi for that source, and extract that, in the full URL form (e.g. “https://doi.org/gf6btx”). This will enable us to later automatically identify all such URLs, and so categorize sources as either providing their own definition, providing no definition, or citing a definition from elsewhere in the literature (as well as compile a list of such references). If they cite a source that does not have a DOI, consult with the EMPATHS-1 coordinators, Jennifer Gutsell and/or Gjalt-Jorn Peters.
If the authors do not define empathy but also do not cite another source as providing the definition they use, extract NA to signify that the definition is missing from the source. Similarly, if authors are not explicit about their definition, extract NA. If authors only provide a definition of empathy in the abstract, report that in the comments field in this clustering entity.
If a source is written in a language that you do not understand, extract “lang” as construct definition. This will allow us to later try to find somebody who can read that language.
Finally, some sources may contain multiple empathy constructs. In that case, extract them into separate entities. To do this, copy the block starting with the line containing “START: empathyConstruct (REPEATING)” and ending with the line containing “END: empathyConstruct (REPEATING)”. Then complete both entities for the second empathy constructs, and repeat until you extracted all different empathy constructs in the source. (An example of such a paper is ns9s; see https://doi.org/ns9s for the PDF and [URL] for the completed Rxs file.)
Extracting a measurement or manipulation instrument
When extracting a measurement or manipulation (entities empathyMeasureId
and empathyManipulationId
), you specify their unique identifier. This identifier is taken from https://archeologists.opens.science/empathy-measures (from the column marked “identifier”). If the instrument you’re extracting is already in the list, you can just specify the relevant identifier in the extraction script.
However, if it does not yet exist, you have to add it. To do this, visit https://opens.science/apps/elsa, create an identifier, and add into the first column. Then specify the rest of the information as described in section “Specifying measurement instruments and/or manipulations” in https://archeologists.opens.science/extraction.
Just like definitions, a study can contain multiple measurement instruments or manipulation instruments. Again, copy the relevant block, from the line with “START: empathyMeasure (REPEATING)” to the line with “END: empathyMeasure (REPEATING)” for multiple measures, and from the line starting with “START: empathyManipulation (REPEATING)” to the line ending with “END: empathyManipulation (REPEATING)” for multiple manipulations.
Conversely, a study may not contain any measurement instruments or manipulations. In that case, you can specify “noMeasure” as value of empathyMeasureId
and leave NULL as empathyMeasureConstructId
, or “noManipulation” as empathyManipulationId
and leave NULL as empathyManipulationConstructId
.
Extracting multiple studies
Sometimes, a source reports on multiple studies. If the studies use different measurement instruments or manipulation instruments, copy the study block like you may have copied definition blocks, measurement instrument blocks, or manipulation instrument blocks before. However, study blocks are larger, and themselves contain ‘repeating’ container entities (specifically, the measurement instrument blocks and the manipulation instrument blocks are specified within the respective study).
To copy the study block, copy the lines in between the line with “START: singleStudyContainer (REPEATING)” to the line with “END: singleStudyContainer (REPEATING)”. As you’ll see, this is quite a large part of the Rxs file. Also note that you may have to specify the population for each study separately.
How to create an identifier
To create a unique identifier for a TOM, TOQ, or TOI, you can either use the R package {psyverse} or the Elsa app. To use Elsa, visit https://opens.science/apps/elsa. Identifiers follow the following format. They start with a brief lowercase sequence of letters that is often an acronym or abbreviation of the instrument’s name (e.g. ‘iri’, ‘bespt’, and ‘epitome’). This is followed by a number: the number of items in the measurement instrument; 0 for a manipulation; or 00 for continuous measurement such as EEG. That is followed by the language of the measurement instrument in ISO 639-3 code (see the extraction instructions for extracting the language a source was written in). That is followed by an underscore, and then the last identifier bit as produced by Elsa.
Future reference
In the future, we will specify the extracted measurement instruments and manipulations in an open repository. For that stage a number of instructions are included here. During the extraction phase of this project, you can ignore this; this is simply retained here for future reference.
If it is a questionnaire, you can choose to specificy it as a TOQ (“Tabulated Open Questionnaire”) specification, enabling importing it into the questionnaire repository at https://operationalizations.com. This is not yet possible for measurement instruments that do not consist of questions and for manipulations; those have to be specified as TOM (“Tabulated Open Metadata”) specifications. Depending on what you choose, follow the corresponding set of instructions below.
Minimal specification of a measurement or manipulation instrument
To specify a TOM (“Tabulated Open Metadata”) specification, you need to complete these steps:
- visit https://archeologists.opens.science/empathy-tabulated-specs
- open “TOM-spec—bespt0eng_7rtpjgf3”
- save a copy under a different name but in the same folder.
- create an identifier prefix (see the procedure below for details) and enter it in cell B3
- visit https://opens.science/apps/elsa, enter the prefix, and create an identifier
- enter the result in cell B4 as UMID
- complete the other fields
- open the spreadsheet at https://archeologists.opens.science/empathy-measures again and add a row with the UMID you just created
Full specification of a questionnaire
To specify a TOQ (“Tabulated Open Questionnaire”) specification, you need to complete these steps:
- visit https://archeologists.opens.science/empathy-tabulated-specs
- open “TOQ-spec—eq60eng_7rs8g3bd”
- save a copy under a different name but in the same folder.
- create an identifier prefix (see the procedure below for details) and enter it in cell B3
- visit https://opens.science/apps/elsa, enter the prefix, and create an identifier
- enter the result in cell B4 as UQID
- complete the other fields
- open the spreadsheet at https://archeologists.opens.science/empathy-measures again and add a row with the UQID you just created
How to create an identifier
To create a unique identifier for a TOM, TOQ, or TOI, you can either use the R package {psyverse} or the Elsa app. To use Elsa, visit https://opens.science/apps/elsa. Identifiers follow the following format. They start with a brief lowercase sequence of letters that is often an acronym or abbreviation of the instrument’s name (e.g. ‘iri’, ‘bespt’, and ‘epitome’). This is followed by a number: the number of items in the measurement instrument; 0 for a manipulation; or 00 for continuous measurement such as EEG. That is followed by the language of the measurement instrument in ISO 639-3 code (see the extraction instructions for extracting the language a source was written in). That is followed by an underscore, and then the last identifier bit as produced by Elsa.
Entity overview (list)
This is an overview of the entities to extract, their titles and descriptions, and other details that will become part of the extraction script template that will be used for the actual extraction.
General
General information
Type: Entity Container
Identifier: general
Path in extraction script tree: source > general
Repeating: FALSE
QURID
Quasi Unique Identifier Record Identifier (QURID).
Extraction instructions: This is already available in the tracking sheet; a QURID was added to every record. We will use this to automatically import bibliographic information available in that file, such as title, keywords, potentially abstract, etc.
Type: Extractable Entity
Identifier: qurid
Value description: A single character value that is used as an identifier and so is always mandatory and can only contain a-z, A-Z, 0-9, and underscores, and must start with a letter.
Path in extraction script tree: source > general > qurid
Value template: string_identifier
Repeating: FALSE
Language
The language in which the article is written as ISO 639-3 code (e.g., to list the 10 most spoken languages: “eng” for English, “zho” for Chinese, “hin” for Hindi, “spa” for Spanish, “fra” for French, and “ara” for Arabic, “ben” for Bengali, “por” for Portuguese, “rus” for Russian, and “urd” for Urdu).
Extraction instructions: Use ISO 639-3 to extract this (see https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes and https://en.wikipedia.org/wiki/ISO_639-3).
Type: Extractable Entity
Identifier: language
Value description: A single character value
Path in extraction script tree: source > general > language
Value template: string
Repeating: FALSE
Empirical
Whether this source reports on one or more empirical studies (i.e. studies where data were created in the context of the study that authors report on, for example through experimentation, observation, simulation, or similar means).
Extraction instructions: Extract “yes” if this source reports results from at least one empirical study. Extract “no” if it does not report results from an empirical study. Extract “unclear” if you are not sure whether results from an empirical study are reported.
Note that since the species under study can be synthetic, collecting data from generative AI or a simulation also counts as empirical.
Type: Extractable Entity
Identifier: empirical
Value description: A string that has to exactly match one of the values specified in the “values” column of the Coding sheet, and that can be omitted (i.e. is allowed to be NULL).
Path in extraction script tree: source > general > empirical
Value template: categorical_omittable
Repeating: FALSE
Empathy Constructs
This container entity is used to extract information about the various empathy constructs studied in this source.
Type: Entity Container
Identifier: empathyConstructs
Path in extraction script tree: source > empathyConstructs
Repeating: FALSE
Empathy Construct
This clustering entity contains information about one single empathy construct as defined in this source. Note that we take a broad view of empathy constructs; this also includes empathy not as a part of the human psyche, but as it may be perceived to be expressed in, for example, a text or recording.
Type: Extractable Entity List
Identifier: empathyConstruct
Empathy Construct Identifier | This is a unique identifier for this empathy construct. It can be used elsewhere in this extraction script to refer to this construct (for example when extracting measurement instruments or manipulations). |
Empathy Definition | The definition of empathy the authors use. |
Empathy Definition Confidence | How confident you are that the definition you extracted is indeed how the authors defined empathy in this source. |
Empathy Construct Type | The type of empathy construct: psychological construct or not. |
Empathy Definition Notes | Any notes you want to specify. |
Path in extraction script tree: source > empathyConstructs > empathyConstruct
Repeating: TRUE
Methods
This container entity holds entities related to the methods used by the study.
Type: Entity Container
Identifier: methods
Path in extraction script tree: source > methods
Repeating: FALSE
Reported Studies
This contained entity holds the studies reported on in this source.
Type: Entity Container
Identifier: reportedStudies
Path in extraction script tree: source > reportedStudies
Repeating: FALSE
Single Study
This container entity contains information about a single study. This is important because some sources report on multiple studies.
Type: Entity Container
Identifier: singleStudyContainer
Path in extraction script tree: source > reportedStudies > singleStudyContainer
Repeating: TRUE
Population
Information about the population of this study.
Type: Entity Container
Identifier: population
Path in extraction script tree: source > reportedStudies > singleStudyContainer > population
Repeating: FALSE
Species
Whether the sample was drawn from humans or non-human populations
Extraction instructions: Extract “human” if the sample description in the methods section indicates a human sample. Extract “animal” if the description in the methods section of none of the studies reported indicates a human sample. Extract “synthetic” if the data were produced by an automated algorithm (e.g. a simulation such as a large language model or an agent-based model). If another species was studies, extract “other” and then also specify that species in the “population_species_other” entity. If the collected data was produced by multiple species, extract all species as a vector (see the examples).
Type: Extractable Entity
Identifier: population_species
Value description: A vector of strings where each element has to exactly match one of the values specified in the “values” column of the Coding sheet
Path in extraction script tree: source > reportedStudies > singleStudyContainer > population > population_species
Value template: categorical_multi
Repeating: FALSE
Other Species
If the species that was specified was “other”, then as this entity extract the text fragment where the authors describe the species they studied.
Extraction instructions: Extract the literal text the authors use; if the species was not extracted as “other”, extract this as NA.
Type: Extractable Entity
Identifier: population_species_other
Value description: A single character value; can be NA or even NULL
Path in extraction script tree: source > reportedStudies > singleStudyContainer > population > population_species_other
Value template: string_omittable
Repeating: FALSE
Manipulation
Whether the source involves a manipulation of empathy (or intervention, behavior change method, therapy component, etc).
Extraction instructions: Assess whether the source introduces or involves a procedure designed to increase, decrease, or otherwise alter the research units’ empathy (i.e. the humans or animals that are studied). This can be called a manipulation in experimental psychology, a behavior change method, technique or principle in behavior change science, or a therapy component in clinical psychology. Other terms are also possible of course: the key is whether the procedure or stimulus was designed to influence empathy. If you conclude that such a procedure or stimulus is described in the source as one of the focal topics, extract “yes”. If you conclude that no such procedure or stimulus is described, extract “no”. If it is unclear whether that is the case, extract “unclear”. If nothing is reported that allows you to draw any conclusions, extract NA (without quotes).
Type: Extractable Entity
Identifier: involvesManipulation
Value description: A string that has to exactly match one of the values specified in the “values” column of the Coding sheet, and that can be omitted (i.e. is allowed to be NULL).
Path in extraction script tree: source > reportedStudies > singleStudyContainer > involvesManipulation
Value template: categorical_omittable
Repeating: FALSE
Empathy Measures
This container entity holds entities specifying how empathy was measured.
Type: Entity Container
Identifier: empathyMeasures
Path in extraction script tree: source > reportedStudies > singleStudyContainer > empathyMeasures
Repeating: FALSE
Empathy Measure
Container entity for this empathy measure.
Type: Extractable Entity List
Identifier: empathyMeasure
Empathy Measure Identifier | The identifier for the empathy measure that was used to measure empathy in this study in this source. |
Measured Construct | The identifier of the construct as entered in its extracted definition above. |
Path in extraction script tree: source > reportedStudies > singleStudyContainer > empathyMeasures > empathyMeasure
Repeating: TRUE
Empathy Manipulations
This container entity holds entities specifying how empathy was manipulated.
Type: Entity Container
Identifier: empathyManipulations
Path in extraction script tree: source > reportedStudies > singleStudyContainer > empathyManipulations
Repeating: FALSE
Empathy Manipulation
Container entity for this empathy manipulation.
Type: Extractable Entity List
Identifier: empathyManipulation
Empathy Manipulation Identifier | The identifier for the empathy manipulation that was used to manipulate empathy in this study in this source. |
Manipulated Construct | The identifier of the construct as entered in its extracted definition above. |
Path in extraction script tree: source > reportedStudies > singleStudyContainer > empathyManipulations > empathyManipulation
Repeating: TRUE
<-
extractorInstructions ::write_extractor_instructions(
metabefor
rxsSpecObject,outputFile = file.path(
extractionPath,"extractor-instructions.pdf"
) );
#install.packages('openalexR');
# works_from_orcids <- openalexR::oa_fetch(
# entity = "works",
# author.orcid = "0000-0002-0336-9589",
# verbose = TRUE
# )
<- FALSE;
updateQuery
if (updateQuery) {
<- openalexR::oa_fetch(
sources display_name.search = "empathy",
publication_year = "2023",
type = "types/article",
primary_topic.field.id = "fields/32",
open_access.is_oa = "true",
output = "list",
verbose = TRUE
);
<-
created_dates unlist(lapply(sources, function(source) { return(source$created_date) }));
### Convert all sources in the first query execution to a data frame
<-
sourcesDf ::works2df(sources[created_dates < "2024-11-14"]);
openalexR
### Sanity check
if (any(duplicated(sourcesDf$id))) {
stop("Duplicated source identifiers!");
}
### Attach QURIDs; specify origin to ensure replicability
$QURID <-
sourcesDf::generate_qurids(
metabefornrow(sourcesDf),
origin = as.POSIXct("2024-11-14 15:54:14 CET")
);
### Prepare author field for further processing
#metabefor::vecTxt(sourcesDf$author[[1]]$au_display_name)
$authorString <-
sourcesDfunlist(lapply(sourcesDf$author,
function(x) {
if (is.data.frame(x)) {
return(metabefor::vecTxt(x$au_display_name));
else if (is.na(x)) {
} return("No author specified");
else {
} browser()
}
}));
$firstAuthorLastName <-
sourcesDftrimws(tolower(unlist(lapply(
$author,
sourcesDffunction(x) {
if (is.data.frame(x)) {
return(gsub(".* ", "", x[1, 'au_display_name']));
else if (is.na(x)) {
} return("NA");
else {
} browser();
}
}
))));
### Correct an erroneous DOI
$doi[sourcesDf$doi == "10.1145/3570945.xxxxxxx"] <-
sourcesDf"10.1145/3570945"
### Get ShortDOIs
$shortDOI <-
sourcesDf::get_short_dois(
metabefor$doi, silent=FALSE, progress=TRUE
sourcesDf
);
### Set source identifiers
$sourceId <-
sourcesDfifelse(
is.na(sourcesDf$shortDOI),
$QURID,
sourcesDf$shortDOI
sourcesDf
);
### Set filenames
$filename <-
sourcesDfpaste0(sourcesDf$firstAuthorLastName, "_",
$publication_year, "_",
sourcesDf$sourceId);
sourcesDf
### Add fields for extraction
$extractorId <- "";
sourcesDf$extractionStatus <- "";
sourcesDf
saveRDS(sources, file.path(searchPath, "empaths-1-query-1---sources.rds"));
saveRDS(sourcesDf, file.path(searchPath, "empaths-1-query-1---sourcesDf.rds"));
else {
}
<-
sources readRDS(file.path(searchPath, "empaths-1-query-1---sources.rds"));
<-
sourcesDf readRDS(file.path(searchPath, "empaths-1-query-1---sourcesDf.rds"));
}
<-
extractionTracking as.data.frame(
sourcesDf[
,c(
"title",
"authorString",
"sourceId",
"extractorId",
"extractionStatus",
"filename",
"QURID",
"publication_year",
"id"
)
]
);
<-
wb ::createWorkbook();
openxlsx
::addWorksheet(wb, "extractionTracking");
openxlsx
::writeData(
openxlsx
wb,sheet = "extractionTracking",
x = extractionTracking
);
### Select URL to find source
$sourceURL <-
sourcesDfifelse(
is.na(sourcesDf$pdf_url),
$doi,
sourcesDf$pdf_url
sourcesDf
);
::writeFormula(
openxlsx
wb,"extractionTracking",
x =
paste0(
'HYPERLINK("', sourcesDf$sourceURL, '", "', sourcesDf$title, '")'
),startCol = 1,
startRow = 2
);
::writeFormula(
openxlsx
wb,"extractionTracking",
x =
paste0(
'HYPERLINK("', sourcesDf$id, '", "', sourcesDf$id, '")'
),startCol = 9,
startRow = 2
);
### Set column widths
::setColWidths(
openxlsx"extractionTracking",
wb, cols = 1:9,
widths = c(50, 30, 10, 10, 10, 20, 12, 10, 30)
);
::saveWorkbook(
openxlsx
wb,file.path(extractionPath, "autogenerated---EMPATHS-1---extraction-phase-1---2024-11-14.xlsx"),
overwrite = TRUE
);
Basic Rxs tree structure
::show_rxsTree_in_rxsStructure(
metabefor
rxsSpecObject,output = file.path(outputPath, "extraction-tree.pdf")
);
levelName 1 source 2 ¦--general 3 ¦ ¦--qurid 4 ¦ ¦--language 5 ¦ °--empirical 6 ¦--empathyConstructs 7 ¦ °--empathyConstruct 8 ¦ ¦--empathyConstructId 9 ¦ ¦--empathyConstructDefinition 10 ¦ ¦--empathyConstructConfidence 11 ¦ ¦--empathyConstructType 12 ¦ °--empathyConstructNotes 13 ¦--methods 14 °--reportedStudies 15 °--singleStudyContainer 16 ¦--population 17 ¦ ¦--population_species 18 ¦ °--population_species_other 19 ¦--involvesManipulation 20 ¦--empathyMeasures 21 ¦ °--empathyMeasure 22 ¦ ¦--empathyMeasureId 23 ¦ °--empathyMeasureConstructId 24 °--empathyManipulations 25 °--empathyManipulation 26 ¦--empathyManipulationId 27 °--empathyManipulationConstructId
Extraction instructions
cat(rxsSpecObject$rxsInstructions);
Extractor instructions
Welcome!
Welcome to the extraction instructions for EMPATHS-1, the first systematic review in the EMPATHS project. If this is new to you, you may want to start at https://archeologists.opens.science/empaths.html. This PDF with extraction instructions is available from https://archeologists.opens.science/empaths-1/extractor-instructions.In this project, the focus is on construct definitions and measurement methods. Therefore, during extraction, these are the main entities that you will spend time on. In addition to the brief extraction instructions specified in these instructions and in the extraction script (.Rxs file), where you will register the extracted data, more extensive instructions are provided here.
Please start by reading these instructions carefully, as well as the extraction instructions at https://archeologists.opens.science/empaths-1/extraction. The instructions have two parts: this first part contains general instructions. The second part, starting from “Entity overview (list)”, contains entity-specific extraction instructions that will also be included in the Rxs Template (the “R Extraction Script Template”). That template is where you will conduct the extraction.
You will start any extraction by copying that template file to a new filename, and then opening that new file to enter the extracted information.
Naming the Rxs file
The filename should follow this format:
“name_year_sourceId_extractorId.rxs.rmd”
Where ‘name’ is the first word in the last (family) name of the first author, stripped of all characters other than a-z or A-Z; ‘year’ is the year of publication of the source; ‘sourceId’ is the source’s unique identifier (the ShortDOI if available; otherwise, the QURID); and ‘extractorId’ is the extractor’s unique identifier (i.e., your identifier).
General extraction instructions
To extract information from the sources, you scroll through the Rxs Template (that you just stored under a new name) and specify what you found for each entity.
You usually enter information by replacing the NULL with the entity content.
Usually, if something is not reported, replace the NULL with NA (also without quotes).
If you extract a number, you can usually just replace the NULL with that number. If you extract text, make sure to use double quotes around the text string.
Sometimes, you can extract multiple values (you can see this in the entity extraction instructions or in the instructions for the value template). In that case, you place them within a “concatenator” or “combiner”: c(). For example, c(1, 2, 3) for numbers, or c(“one”, “two”, “three”) for text strings.
Validating your extraction results
If you completed an R Extraction Script (.Rxs.Rmd file), you can (and should) immediately verify whether everything went well. There are two ways to do this. First, there’s the Extraction Validation App (EVA). Eva lives is at https://opens.science/apps/eva, and can validate your completed extraction script regardless of where you performed the extraction. Second, if you performed the extraction in RStudio, you can render the extraction script with CTRL-ALT-K. This will also produce the validation report, showing which entities validated and what is imported if your extraction script is parsed.
Extract construct definitions
You extract the empathy definition in a so-called “clustering entity” or “list entity”: a set of closely related entities placed closely together in the Rxs template. If you don’t use RStudio for extraction (and so do not benefit of syntax coloring), this can look a bit confusing; clustering entities contain the entity itself (often with default value NULL), immediately followed on the same line by a comment (starting with three hashes, “###”) with the entity’s description, extraction instruction, and the corresponding value template description. Because this is a lot of text, editors (such as Notepad++ and RStudio) will often apply soft word wrapping (splitting long lines and displaying them over multiple lines to prevent them from disappearing off the right side of the screen). You may want to study this clustering entity closely the first time.
The first entity in this clustering entity is the Empathy Construct Identifier for this specific definition. This is used to allows multiple empathy constructs to be defined in (and extracted from) the same source. However, you have to specify an Empathy Construct Identifier even if a source only has one definition: in other words, you always have to specify it. Remember: identifiers can only contains letters, digits, and underscores, and must start with a letter.
The second entity in this clustering entity is the empathy definition. When looking for a definition of empathy, start by using your source viewer (e.g. if the source is in PDF format, it may be your browser (e.g. Firefox) or a dedicated PDF viewer such as Sumatra or Adobe Acrobat) and use the search/find functionality to look for the text string “empathy” (assuming the source was written in English). Ignore definitions in the abstract. If the first occurrence of the construct name is accompanied by its definition, as the authors use it in their work, copy that definition into the extraction script. An example of a (very very brief) definition you might encounter is “Empathy is the ability to understand and relate to the emotions and experiences of others and to effectively communicate that understanding.” (note that definitions can also be much longer).
However, if the first occurrence is accompanied by a definition that the authors discuss, but not as a definition they use themselves but rather e.g. to introduce readers to the definitions that exist, move to the next occurrence. Similarly, if the first occurrence of the word is not accompanied by a definition at all, move to the next occurrence. For each occurrence, repeat this evaluation: are the authors defining what exactly empathy is? In other words, which parts of the human psyche they consider constituting empathy, and which they consider to reflect other constructs?
Once you extracted the first fragment (i.e. one or more sentences), repeat your search to see whether the authors provide additional aspects of their definition further on in the introduction. If they do, extract those as well. Extract fragments that occur at different places on the text as separate text elements (e.g. c("first bit", "second bit")
.
If the authors do not provide an explicit definition, then they may instead cite another source (e.g. an article or a book) and refer to the definition there as the one they use. In that case, obtain the shortdoi for that source, and extract that, in the full URL form (e.g. “https://doi.org/gf6btx”). This will enable us to later automatically identify all such URLs, and so categorize sources as either providing their own definition, providing no definition, or citing a definition from elsewhere in the literature (as well as compile a list of such references). If they cite a source that does not have a DOI, consult with the EMPATHS-1 coordinators, Jennifer Gutsell and/or Gjalt-Jorn Peters.
If the authors do not define empathy but also do not cite another source as providing the definition they use, extract NA to signify that the definition is missing from the source. Similarly, if authors are not explicit about their definition, extract NA. If authors only provide a definition of empathy in the abstract, report that in the comments field in this clustering entity.
If a source is written in a language that you do not understand, extract “lang” as construct definition. This will allow us to later try to find somebody who can read that language.
Finally, some sources may contain multiple empathy constructs. In that case, extract them into separate entities. To do this, copy the block starting with the line containing “START: empathyConstruct (REPEATING)” and ending with the line containing “END: empathyConstruct (REPEATING)”. Then complete both entities for the second empathy constructs, and repeat until you extracted all different empathy constructs in the source. (An example of such a paper is ns9s; see https://doi.org/ns9s for the PDF and [URL] for the completed Rxs file.)
Extracting a measurement or manipulation instrument
When extracting a measurement or manipulation (entities empathyMeasureId
and empathyManipulationId
), you specify their unique identifier. This identifier is taken from https://archeologists.opens.science/empathy-measures (from the column marked “identifier”). If the instrument you’re extracting is already in the list, you can just specify the relevant identifier in the extraction script.
However, if it does not yet exist, you have to add it. To do this, visit https://opens.science/apps/elsa, create an identifier, and add into the first column. Then specify the rest of the information as described in section “Specifying measurement instruments and/or manipulations” in https://archeologists.opens.science/extraction.
Just like definitions, a study can contain multiple measurement instruments or manipulation instruments. Again, copy the relevant block, from the line with “START: empathyMeasure (REPEATING)” to the line with “END: empathyMeasure (REPEATING)” for multiple measures, and from the line starting with “START: empathyManipulation (REPEATING)” to the line ending with “END: empathyManipulation (REPEATING)” for multiple manipulations.
Conversely, a study may not contain any measurement instruments or manipulations. In that case, you can specify “noMeasure” as value of empathyMeasureId
and leave NULL as empathyMeasureConstructId
, or “noManipulation” as empathyManipulationId
and leave NULL as empathyManipulationConstructId
.
Extracting multiple studies
Sometimes, a source reports on multiple studies. If the studies use different measurement instruments or manipulation instruments, copy the study block like you may have copied definition blocks, measurement instrument blocks, or manipulation instrument blocks before. However, study blocks are larger, and themselves contain ‘repeating’ container entities (specifically, the measurement instrument blocks and the manipulation instrument blocks are specified within the respective study).
To copy the study block, copy the lines in between the line with “START: singleStudyContainer (REPEATING)” to the line with “END: singleStudyContainer (REPEATING)”. As you’ll see, this is quite a large part of the Rxs file. Also note that you may have to specify the population for each study separately.
How to create an identifier
To create a unique identifier for a TOM, TOQ, or TOI, you can either use the R package {psyverse} or the Elsa app. To use Elsa, visit https://opens.science/apps/elsa. Identifiers follow the following format. They start with a brief lowercase sequence of letters that is often an acronym or abbreviation of the instrument’s name (e.g. ‘iri’, ‘bespt’, and ‘epitome’). This is followed by a number: the number of items in the measurement instrument; 0 for a manipulation; or 00 for continuous measurement such as EEG. That is followed by the language of the measurement instrument in ISO 639-3 code (see the extraction instructions for extracting the language a source was written in). That is followed by an underscore, and then the last identifier bit as produced by Elsa.
Future reference
In the future, we will specify the extracted measurement instruments and manipulations in an open repository. For that stage a number of instructions are included here. During the extraction phase of this project, you can ignore this; this is simply retained here for future reference.
If it is a questionnaire, you can choose to specificy it as a TOQ (“Tabulated Open Questionnaire”) specification, enabling importing it into the questionnaire repository at https://operationalizations.com. This is not yet possible for measurement instruments that do not consist of questions and for manipulations; those have to be specified as TOM (“Tabulated Open Metadata”) specifications. Depending on what you choose, follow the corresponding set of instructions below.
Minimal specification of a measurement or manipulation instrument
To specify a TOM (“Tabulated Open Metadata”) specification, you need to complete these steps:
- visit https://archeologists.opens.science/empathy-tabulated-specs
- open “TOM-spec—bespt0eng_7rtpjgf3”
- save a copy under a different name but in the same folder.
- create an identifier prefix (see the procedure below for details) and enter it in cell B3
- visit https://opens.science/apps/elsa, enter the prefix, and create an identifier
- enter the result in cell B4 as UMID
- complete the other fields
- open the spreadsheet at https://archeologists.opens.science/empathy-measures again and add a row with the UMID you just created
Full specification of a questionnaire
To specify a TOQ (“Tabulated Open Questionnaire”) specification, you need to complete these steps:
- visit https://archeologists.opens.science/empathy-tabulated-specs
- open “TOQ-spec—eq60eng_7rs8g3bd”
- save a copy under a different name but in the same folder.
- create an identifier prefix (see the procedure below for details) and enter it in cell B3
- visit https://opens.science/apps/elsa, enter the prefix, and create an identifier
- enter the result in cell B4 as UQID
- complete the other fields
- open the spreadsheet at https://archeologists.opens.science/empathy-measures again and add a row with the UQID you just created
How to create an identifier
To create a unique identifier for a TOM, TOQ, or TOI, you can either use the R package {psyverse} or the Elsa app. To use Elsa, visit https://opens.science/apps/elsa. Identifiers follow the following format. They start with a brief lowercase sequence of letters that is often an acronym or abbreviation of the instrument’s name (e.g. ‘iri’, ‘bespt’, and ‘epitome’). This is followed by a number: the number of items in the measurement instrument; 0 for a manipulation; or 00 for continuous measurement such as EEG. That is followed by the language of the measurement instrument in ISO 639-3 code (see the extraction instructions for extracting the language a source was written in). That is followed by an underscore, and then the last identifier bit as produced by Elsa.
Entity overview
cat(rxsSpecObject$entityOverview_list);
Entity overview (list)
This is an overview of the entities to extract, their titles and descriptions, and other details that will become part of the extraction script template that will be used for the actual extraction.
General
General information
Type: Entity Container
Identifier: general
Path in extraction script tree: source > general
Repeating: FALSE
QURID
Quasi Unique Identifier Record Identifier (QURID).
Extraction instructions: This is already available in the tracking sheet; a QURID was added to every record. We will use this to automatically import bibliographic information available in that file, such as title, keywords, potentially abstract, etc.
Type: Extractable Entity
Identifier: qurid
Value description: A single character value that is used as an identifier and so is always mandatory and can only contain a-z, A-Z, 0-9, and underscores, and must start with a letter.
Path in extraction script tree: source > general > qurid
Value template: string_identifier
Repeating: FALSE
Language
The language in which the article is written as ISO 639-3 code (e.g., to list the 10 most spoken languages: “eng” for English, “zho” for Chinese, “hin” for Hindi, “spa” for Spanish, “fra” for French, and “ara” for Arabic, “ben” for Bengali, “por” for Portuguese, “rus” for Russian, and “urd” for Urdu).
Extraction instructions: Use ISO 639-3 to extract this (see https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes and https://en.wikipedia.org/wiki/ISO_639-3).
Type: Extractable Entity
Identifier: language
Value description: A single character value
Path in extraction script tree: source > general > language
Value template: string
Repeating: FALSE
Empirical
Whether this source reports on one or more empirical studies (i.e. studies where data were created in the context of the study that authors report on, for example through experimentation, observation, simulation, or similar means).
Extraction instructions: Extract “yes” if this source reports results from at least one empirical study. Extract “no” if it does not report results from an empirical study. Extract “unclear” if you are not sure whether results from an empirical study are reported.
Note that since the species under study can be synthetic, collecting data from generative AI or a simulation also counts as empirical.
Type: Extractable Entity
Identifier: empirical
Value description: A string that has to exactly match one of the values specified in the “values” column of the Coding sheet, and that can be omitted (i.e. is allowed to be NULL).
Path in extraction script tree: source > general > empirical
Value template: categorical_omittable
Repeating: FALSE
Empathy Constructs
This container entity is used to extract information about the various empathy constructs studied in this source.
Type: Entity Container
Identifier: empathyConstructs
Path in extraction script tree: source > empathyConstructs
Repeating: FALSE
Empathy Construct
This clustering entity contains information about one single empathy construct as defined in this source. Note that we take a broad view of empathy constructs; this also includes empathy not as a part of the human psyche, but as it may be perceived to be expressed in, for example, a text or recording.
Type: Extractable Entity List
Identifier: empathyConstruct
Empathy Construct Identifier | This is a unique identifier for this empathy construct. It can be used elsewhere in this extraction script to refer to this construct (for example when extracting measurement instruments or manipulations). |
Empathy Definition | The definition of empathy the authors use. |
Empathy Definition Confidence | How confident you are that the definition you extracted is indeed how the authors defined empathy in this source. |
Empathy Construct Type | The type of empathy construct: psychological construct or not. |
Empathy Definition Notes | Any notes you want to specify. |
Path in extraction script tree: source > empathyConstructs > empathyConstruct
Repeating: TRUE
Methods
This container entity holds entities related to the methods used by the study.
Type: Entity Container
Identifier: methods
Path in extraction script tree: source > methods
Repeating: FALSE
Reported Studies
This contained entity holds the studies reported on in this source.
Type: Entity Container
Identifier: reportedStudies
Path in extraction script tree: source > reportedStudies
Repeating: FALSE
Single Study
This container entity contains information about a single study. This is important because some sources report on multiple studies.
Type: Entity Container
Identifier: singleStudyContainer
Path in extraction script tree: source > reportedStudies > singleStudyContainer
Repeating: TRUE
Population
Information about the population of this study.
Type: Entity Container
Identifier: population
Path in extraction script tree: source > reportedStudies > singleStudyContainer > population
Repeating: FALSE
Species
Whether the sample was drawn from humans or non-human populations
Extraction instructions: Extract “human” if the sample description in the methods section indicates a human sample. Extract “animal” if the description in the methods section of none of the studies reported indicates a human sample. Extract “synthetic” if the data were produced by an automated algorithm (e.g. a simulation such as a large language model or an agent-based model). If another species was studies, extract “other” and then also specify that species in the “population_species_other” entity. If the collected data was produced by multiple species, extract all species as a vector (see the examples).
Type: Extractable Entity
Identifier: population_species
Value description: A vector of strings where each element has to exactly match one of the values specified in the “values” column of the Coding sheet
Path in extraction script tree: source > reportedStudies > singleStudyContainer > population > population_species
Value template: categorical_multi
Repeating: FALSE
Other Species
If the species that was specified was “other”, then as this entity extract the text fragment where the authors describe the species they studied.
Extraction instructions: Extract the literal text the authors use; if the species was not extracted as “other”, extract this as NA.
Type: Extractable Entity
Identifier: population_species_other
Value description: A single character value; can be NA or even NULL
Path in extraction script tree: source > reportedStudies > singleStudyContainer > population > population_species_other
Value template: string_omittable
Repeating: FALSE
Manipulation
Whether the source involves a manipulation of empathy (or intervention, behavior change method, therapy component, etc).
Extraction instructions: Assess whether the source introduces or involves a procedure designed to increase, decrease, or otherwise alter the research units’ empathy (i.e. the humans or animals that are studied). This can be called a manipulation in experimental psychology, a behavior change method, technique or principle in behavior change science, or a therapy component in clinical psychology. Other terms are also possible of course: the key is whether the procedure or stimulus was designed to influence empathy. If you conclude that such a procedure or stimulus is described in the source as one of the focal topics, extract “yes”. If you conclude that no such procedure or stimulus is described, extract “no”. If it is unclear whether that is the case, extract “unclear”. If nothing is reported that allows you to draw any conclusions, extract NA (without quotes).
Type: Extractable Entity
Identifier: involvesManipulation
Value description: A string that has to exactly match one of the values specified in the “values” column of the Coding sheet, and that can be omitted (i.e. is allowed to be NULL).
Path in extraction script tree: source > reportedStudies > singleStudyContainer > involvesManipulation
Value template: categorical_omittable
Repeating: FALSE
Empathy Measures
This container entity holds entities specifying how empathy was measured.
Type: Entity Container
Identifier: empathyMeasures
Path in extraction script tree: source > reportedStudies > singleStudyContainer > empathyMeasures
Repeating: FALSE
Empathy Measure
Container entity for this empathy measure.
Type: Extractable Entity List
Identifier: empathyMeasure
Empathy Measure Identifier | The identifier for the empathy measure that was used to measure empathy in this study in this source. |
Measured Construct | The identifier of the construct as entered in its extracted definition above. |
Path in extraction script tree: source > reportedStudies > singleStudyContainer > empathyMeasures > empathyMeasure
Repeating: TRUE
Empathy Manipulations
This container entity holds entities specifying how empathy was manipulated.
Type: Entity Container
Identifier: empathyManipulations
Path in extraction script tree: source > reportedStudies > singleStudyContainer > empathyManipulations
Repeating: FALSE
Empathy Manipulation
Container entity for this empathy manipulation.
Type: Extractable Entity List
Identifier: empathyManipulation
Empathy Manipulation Identifier | The identifier for the empathy manipulation that was used to manipulate empathy in this study in this source. |
Manipulated Construct | The identifier of the construct as entered in its extracted definition above. |
Path in extraction script tree: source > reportedStudies > singleStudyContainer > empathyManipulations > empathyManipulation
Repeating: TRUE
Extraction script template
This is the extraction script generated based on the extraction script specification.
cat("\n\n<pre><textarea rows='40' cols='124' style='font-family:monospace;font-size:11px;white-space:pre;'>",
unlist(rxsSpecObject$rxsTemplate),
"</textarea></pre>\n\n",
sep="\n");
Planning: Screening
(link to corresponding SysRevving chapter)
Example: …
Planning: Search
(link to corresponding SysRevving chapter)
Example: We will search using the Ebsco interface in the PsycINFO and Ebsco E-journals databases, and we will use PubMed (using its own interface).
We will only search in titles, and our conceptual query consists of two main terms (substance synonyms and determinant synonyms), where the first main term is split per set of synonyms for each substance.
In the Ebsco query syntax, the query is:
(TI (((ecstasy OR mdma) OR (coke OR cocaine) OR (GHB) OR (LSD) OR (ketamine OR "special K")) AND (determinants OR factors OR reasons)))
That will be used for the PsycINFO and Ebsco E-Journals databases.
In the PubMed query synax, the query is:
(((ecstasy[Title] OR mdma[Title]) OR (coke[Title] OR cocaine[Title]) OR (GHB[Title]) OR (LSD[Title]) OR (ketamine[Title] OR "special K"[Title])) AND (determinants[Title] OR factors[Title] OR reasons[Title]))
Preregistration
(link to corresponding SysRevving chapter)
### Note: this chunk doesn't need to be evaluated (i.e. chunk option "eval" is
### set to FALSE), but in case it is, it writes the template to a different
### file than the version with content added and included in the next chunk.
### (For a list of included packages, see data(package='preregr'))
::form_to_rmd_template(
preregr"genSysRev_v1",
file = file.path(scriptPath, "preregistration-autogenerated.Rmd"),
includeYAML = FALSE
);
### Note also that the preregistration form contains a level 2 heading
Inclusive Systematic Review Registration Form
Section: Metadata
Section: Review methods
Empathy plays a pivotal role in people’s socio-emotional well-being. In light of its significance, research on empathy has experienced considerable growth in the last two decades. Yet, the existing literature lacks clear construct definitions and agreed-upon measures that capture the multifaceted nature of empathy. There is a growing consensus suggesting that empathy can be viewed as a broad, overarching term encompassing at least three distinct sub-constructs that represent critical dimensions of empathy: an affective component involving emotions, a cognitive component related to understanding, and the act of sharing experiences. Additionally, a certain degree of self-other differentiation and a motivational component – the desire to promote others’ well-being or alleviate their suffering is often integral to the empathic experience.
Despite this conceptual framework, the extent to which empirical studies align with this view of empathy and its constituent elements remains unclear. We are planning to conduct a large-scale scoping review to evaluate how empirical research approaches the measurement and manipulation of empathy and its components. Our review aims to address questions regarding which components of empathy receive significant attention and which remain underexplored, as well as how these components are operationalized and measured. Furthermore, this scoping review will culminate in the creation of a publicly accessible database containing machine-readable data, which can serve as a valuable resource for future systematic reviews and meta-analyses.
Subsequent research could utilize our new database to explore questions like how different components of empathy affect various outcome measures differentially. Similarly, investigating the factors that either facilitate or impede these empathy components and their impact on empathy itself could be a promising future direction stemming from this project.Section: Search strategy
Section: Screening
Section: Extraction
Section: Synthesis and Quality Assessment
::prereg_spec_to_pdf(
preregr
preregrObject,file = file.path(preregPath, "registration-1---preregistration.pdf"),
author = rmarkdown::metadata$author
);
Example: …
Execution
Execution: Search
(link to corresponding SysRevving chapter)
Example: The queries are entered into the specified interfaces to search the specified databases, separately for each database. The three RIS-files are stored using the following filename convention: YYYY-MM-DD_interface_database_originalFileName.ris
.
Importing the search hits
# searchResults <-
# metabefor::import_search_results(
# searchPath,
# dirRegex="2023-02-03"
# );
This is the number of hits we have for each database:
# knitr::kable(
# table(searchResults$bibHitDf$originDatabase),
# col.names = c("Database", "Number of records")
# );
We also see that only a minority of the records has a DOI - at least one that was correctly recognized by synthesisr
:
# knitr::kable(
# table(!is.na(searchResults$bibHitDf$doi)),
# col.names = c("DOI present?", "Number of records")
# );
Deduplication
### Temporary deactivation to run script
# infoAboutDuplicates <-
# metabefor::check_duplicate_sources(
# searchResults$bibHitDf
# );
#
# moreInfoAboutDuplicates <-
# attr(infoAboutDuplicates, "duplicateInfo");
#
# duplicateChecking <-
# table(
# moreInfoAboutDuplicates$fullMatch_year &
# moreInfoAboutDuplicates$fullMatch_title &
# moreInfoAboutDuplicates$fullMatch_author
# );
#
# searchResults$bibHitDf$duplicate <-
# ifelse(
# infoAboutDuplicates,
# "duplicate",
# ""
# );
#
# table(searchResults$bibHitDf$duplicate);
Execution: Screening
(link to corresponding SysRevving chapter)
Example: …
Screening stage 1
###-----------------------------------------------------------------------------
### Process first search batch
### Note that these are sorted by batch
###-----------------------------------------------------------------------------
# ### Generate and add quasi-unique record identifiers; note that the origin
# ### *must* be hardcoded to preserve the same QURIDs for every record. The first
# ### record should get "qurid_7mtttgrb".
# searchResults$bibHitDf$qurid <-
# metabefor::generate_qurids(
# nrow(searchResults$bibHitDf),
# origin = as.POSIXct("2023-02-06 15:39:43 CET")
# );
#
# screenerPackages <-
# metabefor::write_screenerPackage(
# bibliographyDf = searchResults,
# outputPath = screeningPath,
# screeners = c("fm2", "il1", "av5"),
# screenerFieldsPrefix = "stage1_",
# basename = "stage1_",
# duplicateField = "duplicate"
# );
### Potentially, to screen with revtools:
# revtools::screen_titles(bibHitDf[[1]]);
# ###-----------------------------------------------------------------------------
# ### Import files
# ###-----------------------------------------------------------------------------
#
# filesToImport <-
# list.files(
# screeningPath,
# recursive = TRUE,
# pattern = "2023-02-28.*bib",
# full.names = TRUE
# );
#
# screenerAcronyms <-
# gsub("^.*stage1_([a-zA-Z0-9]+)\\.bib$",
# "\\1",
# filesToImport);
#
# # screening_stage1_imported_1 <-
# # lapply(
# # filesToImport,
# # bibtex::read.bib
# # );
#
# screening_stage1_imported_2 <-
# lapply(
# filesToImport,
# RefManageR::ReadBib
# );
# names(screening_stage1_imported_2) <- screenerAcronyms;
#
# screening_stage1_imported_2_df <-
# lapply(
# screening_stage1_imported_2,
# as.data.frame
# )
# names(screening_stage1_imported_2_df) <- screenerAcronyms;
#
# ### Fix wrong column
# # screening_stage1_imported_2_df$av5$screener_av5_stage_1 <-
# # screening_stage1_imported_2_df$av5$screener_av5_stage_2;
#
# getScreenerCols <-
# lapply(
# screenerAcronyms,
# function(x) {
# return(
# screening_stage1_imported_2_df[[x]][, c("qurid",
# paste0("screener_", x, "_stage_1"))]);
# }
# );
# names(getScreenerCols) <- screenerAcronyms;
#
# newDf <-
# merge(
# screening_stage1_imported_2_df$fm2,
# getScreenerCols$il1,
# by = "qurid"
# );
# newDf <-
# merge(
# newDf,
# getScreenerCols$av5,
# by = "qurid"
# );
#
# write.csv(newDf,
# file = file.path(screeningPath, "2023-02-28---stage1_merged.csv"));
#
# writexl::write_xlsx(
# newDf,
# file.path(screeningPath, "2023-02-28---stage1_merged.xlsx")
# );
# newDf <-
# as.data.frame(
# readxl::read_xlsx(
# file.path(screeningPath, "2023-02-28---stage1_merged.xlsx")
# )
# );
### Potentially, to screen with revtools:
# revtools::screen_titles(bibHitDf[[1]]);
Execution: Extraction
(link to corresponding SysRevving chapter)
Example: …
# test <-
# metabefor::rxs_parseExtractionScripts(
# path = rxsSpecPath,
# exclude = NULL
# );
Execution: Synthesis
(link to corresponding SysRevving chapter)
Example: …