Skip to content

Commit

Permalink
Add material science domain (#597)
Browse files Browse the repository at this point in the history
  • Loading branch information
TungstnBallon committed Jun 30, 2024
1 parent f4db0fe commit 9c5e5bd
Show file tree
Hide file tree
Showing 2 changed files with 153 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
// SPDX-FileCopyrightText: 2024 Friedrich-Alexander-Universitat Erlangen-Nurnberg
//
// SPDX-License-Identifier: AGPL-3.0-only

/** Transformer to standardize DOI references and remove dx.doi.org/ */
publish transform RemoveDxDotDoiDotOrg {
from valueWithDxDotDoiDotOrg oftype text;
to valueWithoutDxDotDoiDotOrg oftype text;

valueWithoutDxDotDoiDotOrg: valueWithDxDotDoiDotOrg replace /\b(dx\.doi\.org\/)\b/ with "";
}

/** Transformer to standardize DOI references and remove http://dx.doi.org/ */
publish transform RemoveHttpDxDotDoiDotOrg {
from valueWithHttpDxDotDoiDotOrg oftype text;
to valueWithoutHttpDxDotDoiDotOrg oftype text;

valueWithoutHttpDxDotDoiDotOrg: valueWithHttpDxDotDoiDotOrg replace /^http:\/\/dx\.doi\.org\/\b/ with "";
}

/** Transformer to standardize DOI references and remove http:// */
publish transform RemoveHttp {
from valueWithHttp oftype text;
to valueWithoutHttp oftype text;

valueWithoutHttp: valueWithHttp replace /(\bhttp:\/\/)/ with "";
}

/** Transformer to standardize DOI references and remove https://doi.org/ */
publish transform RemoveHttpsDoiDotOrg {
from valueWithHttpsDoiDotOrg oftype text;
to valueWithoutHttpsDotDoiDotOrg oftype text;

valueWithoutHttpsDotDoiDotOrg: valueWithHttpsDoiDotOrg replace /^https:\/\/doi\.org\/\b/ with "";
}

/**
* A DOIStandardizer removes common prefixes to [doi references](https://www.doi.org/the-identifier/what-is-a-doi/).
*
* properties:
* doiColumn: The name of the column with the doi references. This column will be overwritten!
*
* This block standardizes doi values by removing the following prefixes.
* 1. dx.doi.org/
* 2. http://dx.doi.org/
* 3. http://
* 4. https://doi.org/
*
* Examples:
* - "https://doi.org/10.1111/example.1234" becomes "10.1111/example.1234"
* - "dx.doi.org/10.0000/456" becomes "10.0000/456"
* - "http://10.1015/23" becomes "10.1015/23"
*/
publish composite blocktype DOIStandardizer {
input UnstandardizedDOI oftype Table;
output StandardizedDOI oftype Table;

property doiColumn oftype text;

UnstandardizedDOI
-> DOIStandardizerDxDotDoiDotOrg
-> DOIStandardizerHttpDxDotDoiDotOrg
-> DOIStandardizerHttp
-> DOIStandardizerHttpsDoiDotOrg
-> StandardizedDOI;

block DOIStandardizerDxDotDoiDotOrg oftype TableTransformer {
inputColumns: [
doiColumn
];
outputColumn: doiColumn;
uses: RemoveDxDotDoiDotOrg;
}

block DOIStandardizerHttpDxDotDoiDotOrg oftype TableTransformer {
inputColumns: [
doiColumn
];
outputColumn: doiColumn;
uses: RemoveHttpDxDotDoiDotOrg;
}

block DOIStandardizerHttp oftype TableTransformer {
inputColumns: [
doiColumn
];
outputColumn: doiColumn;
uses: RemoveHttp;
}

block DOIStandardizerHttpsDoiDotOrg oftype TableTransformer {
inputColumns: [
doiColumn
];
outputColumn: doiColumn;
uses: RemoveHttpsDoiDotOrg;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// SPDX-FileCopyrightText: 2024 Friedrich-Alexander-Universitat Erlangen-Nurnberg
//
// SPDX-License-Identifier: AGPL-3.0-only

constraint DOIFormat on text: value matches /\b10\.\d{4}\/[^\s]+\b/;
/**
* DOI Format has been constrained by a standard pattern, eg: 10.1007/xxxx
* The `DOIStandardizer` block can remove common prefixes to this pattern, with the result matching this format.
*/
publish valuetype DOI oftype text {
constraints: [
DOIFormat
];
}

constraint DateFormatYYYYMMDDRegex on text: value matches /\d{4}-\d{2}-\d{2}/;
/** DateFormat as YYYY-MM-DD */
publish valuetype DateYYYYMMDD oftype text {
constraints: [
DateFormatYYYYMMDDRegex
];
}

constraint SiUnitConstraint on text: value matches /\b((Second|Metre|Kilogram|Ampere|Kelvin|Mole|Candela)+\^\(\d+(\.\d+)?\|\-\d+(\.\d+)?\)+\s)*\b/;
/** Constraining the Unit column to be of a specific format like "Second^(1.0)", "Ampere^(1.0)" or "Candela^(1.0)". */
publish valuetype SiUnit oftype text {
constraints: [
SiUnitConstraint
];
}

constraint PressureUnitPascalConstraint on text: value matches /\b(Pascal\^\(\d+(\.\d+)?\|\-\d+(\.\d+)?\)+\s)*\b/;
/** Constrains Pressure units to be Pascal^(x). */
publish valuetype PressureUnitPascal oftype text {
constraints: [
PressureUnitPascalConstraint
];
}

constraint LengthUnitMeterConstraint on text: value matches /\b(Meter\^\(\d+(\.\d+)?\|\-\d+(\.\d+)?\)+\s)*\b/;
/** Constrains Length units to be Meter^(x). */
publish valuetype LengthUnitMeter oftype text {
constraints: [
LengthUnitMeterConstraint
];
}


constraint TemperatureUnitKelvinConstraint on text: value matches /\b(Kelvin\^\(\d+(\.\d+)?\|\-\d+(\.\d+)?\)+\s)*\b/;
/** Constrains Temperature units to be Kelvin^(x). */
publish valuetype TemperatureUnitKelvin oftype text {
constraints: [
TemperatureUnitKelvinConstraint
];
}

0 comments on commit 9c5e5bd

Please sign in to comment.