Skip to content

Commit

Permalink
Merge embulk-guess-json
Browse files Browse the repository at this point in the history
  • Loading branch information
dmikurube committed Jul 28, 2023
2 parents 33d8aa7 + ab0d310 commit b76a78b
Show file tree
Hide file tree
Showing 5 changed files with 280 additions and 1 deletion.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# embulk-parser-json
# embulk-parser-json / embulk-guess-json

This is one of Embulk's "standard" plugins that are embedded in Embulk's executable binary distributions.

Expand Down
163 changes: 163 additions & 0 deletions embulk-guess-json/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
plugins {
id "java"
id "maven-publish"
id "signing"
id "org.embulk.embulk-plugins" version "0.5.5"
id "checkstyle"
}

repositories {
mavenCentral()
}

group = "org.embulk"
version = "0.11.0-SNAPSHOT"
description = "Guesses if an input is in JSON."

configurations {
compileClasspath.resolutionStrategy.activateDependencyLocking()
runtimeClasspath.resolutionStrategy.activateDependencyLocking()
}

tasks.withType(JavaCompile) {
options.compilerArgs << "-Xlint:deprecation" << "-Xlint:unchecked"
options.encoding = "UTF-8"
}

java {
toolchain {
languageVersion = JavaLanguageVersion.of(8)
}

withJavadocJar()
withSourcesJar()
}

dependencies {
compileOnly "org.embulk:embulk-spi:0.11"
compileOnly "org.msgpack:msgpack-core:0.8.24"

implementation "org.embulk:embulk-util-config:0.3.4"
implementation "org.embulk:embulk-util-file:0.1.5"
implementation "org.embulk:embulk-util-json:0.2.2"
}

embulkPlugin {
mainClass = "org.embulk.guess.json.JsonGuessPlugin"
category = "guess"
type = "json"
}

jar {
metaInf {
from rootProject.file("LICENSE")
}
}

sourcesJar {
metaInf {
from rootProject.file("LICENSE")
}
}

javadocJar {
metaInf {
from rootProject.file("LICENSE")
}
}

publishing {
publications {
maven(MavenPublication) {
groupId = project.group
artifactId = project.name

from components.java
// javadocJar and sourcesJar are added by java.withJavadocJar() and java.withSourcesJar() above.
// See: https://docs.gradle.org/current/javadoc/org/gradle/api/plugins/JavaPluginExtension.html

pom { // https://central.sonatype.org/pages/requirements.html
packaging "jar"

name = project.name
description = project.description
url = "https://www.embulk.org/"

licenses {
license {
// http://central.sonatype.org/pages/requirements.html#license-information
name = "The Apache License, Version 2.0"
url = "https://www.apache.org/licenses/LICENSE-2.0.txt"
distribution = "repo"
}
}

developers {
developer {
name = "Dai MIKURUBE"
email = "[email protected]"
}
}

scm {
connection = "scm:git:git://github.com/embulk/embulk-guess-json.git"
developerConnection = "scm:git:[email protected]:embulk/embulk-guess-json.git"
url = "https://github.com/embulk/embulk-guess-json"
}
}
}
}

repositories {
maven { // publishMavenPublicationToMavenCentralRepository
name = "mavenCentral"
if (project.version.endsWith("-SNAPSHOT")) {
url "https://oss.sonatype.org/content/repositories/snapshots"
} else {
url "https://oss.sonatype.org/service/local/staging/deploy/maven2"
}

credentials {
username = project.hasProperty("ossrhUsername") ? ossrhUsername : ""
password = project.hasProperty("ossrhPassword") ? ossrhPassword : ""
}
}
}
}

signing {
if (project.hasProperty("signingKey") && project.hasProperty("signingPassword")) {
logger.lifecycle("Signing with an in-memory key.")
useInMemoryPgpKeys(signingKey, signingPassword)
}
sign publishing.publications.maven
}

test {
testLogging {
events "passed", "skipped", "failed", "standardOut", "standardError"
exceptionFormat = org.gradle.api.tasks.testing.logging.TestExceptionFormat.FULL
showCauses = true
showExceptions = true
showStackTraces = true
showStandardStreams = true
outputs.upToDateWhen { false }
}
}

tasks.withType(Checkstyle) {
reports {
// Not to skip up-to-date checkstyles.
outputs.upToDateWhen { false }
}
}

checkstyle {
toolVersion "8.7"
configFile = file("${rootProject.projectDir}/config/checkstyle/checkstyle.xml")
configProperties = [
"checkstyle.config.path": file("${rootProject.projectDir}/config/checkstyle")
]
ignoreFailures = false
maxWarnings = 0 // https://github.com/gradle/gradle/issues/881
}
15 changes: 15 additions & 0 deletions embulk-guess-json/gradle.lockfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# This is a Gradle generated file for dependency locking.
# Manual edits can break the build and are not advised.
# This file is expected to be part of source control.
com.fasterxml.jackson.core:jackson-annotations:2.6.7=compileClasspath,runtimeClasspath
com.fasterxml.jackson.core:jackson-core:2.6.7=compileClasspath,runtimeClasspath
com.fasterxml.jackson.core:jackson-databind:2.6.7.5=compileClasspath,runtimeClasspath
com.fasterxml.jackson.datatype:jackson-datatype-jdk8:2.6.7=compileClasspath,runtimeClasspath
javax.validation:validation-api:1.1.0.Final=compileClasspath,runtimeClasspath
org.embulk:embulk-spi:0.11=compileClasspath
org.embulk:embulk-util-config:0.3.4=compileClasspath,runtimeClasspath
org.embulk:embulk-util-file:0.1.5=compileClasspath,runtimeClasspath
org.embulk:embulk-util-json:0.2.2=compileClasspath,runtimeClasspath
org.msgpack:msgpack-core:0.8.24=compileClasspath
org.slf4j:slf4j-api:2.0.7=compileClasspath
empty=
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/*
* Copyright 2021 The Embulk project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.embulk.guess.json;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UncheckedIOException;
import java.util.ArrayList;
import org.embulk.config.ConfigDiff;
import org.embulk.config.ConfigSource;
import org.embulk.spi.Buffer;
import org.embulk.spi.BufferAllocator;
import org.embulk.spi.Exec;
import org.embulk.spi.GuessPlugin;
import org.embulk.util.config.ConfigMapperFactory;
import org.embulk.util.file.FileInputInputStream;
import org.embulk.util.file.InputStreamFileInput;
import org.embulk.util.json.JsonParseException;
import org.embulk.util.json.JsonParser;
import org.msgpack.value.Value;

public class JsonGuessPlugin implements GuessPlugin {
@Override
public ConfigDiff guess(final ConfigSource config, final Buffer sample) {
final ConfigDiff configDiff = CONFIG_MAPPER_FACTORY.newConfigDiff();

if (!"json".equals(config.getNestedOrGetEmpty("parser").get(String.class, "type", "json"))) {
return configDiff;
}

final BufferAllocator bufferAllocator = Exec.getBufferAllocator();

// Use org.embulk.spi.json.JsonParser to respond to multi-line Json
final JsonParser.Stream jsonParser = newJsonParser(sample, bufferAllocator);

boolean oneJsonParsed = false;
try {
Value v = null;
while ((v = jsonParser.next()) != null) {
// "v" needs to be JSON object type (isMapValue) because:
// 1) Single-column CSV can be mis-guessed as JSON if JSON non-objects are accepted.
// 2) JsonParserPlugin accepts only the JSON object type.
if (!v.isMapValue()) {
throw new JsonParseException("v must be JSON object type");
}
oneJsonParsed = true;
}
} catch (final JsonParseException ex) {
// the exception is ignored
} catch (final IOException ex) {
throw new UncheckedIOException(ex);
}

if (oneJsonParsed) {
// if JsonParser can parse even one JSON data
final ConfigDiff typeJson = CONFIG_MAPPER_FACTORY.newConfigDiff();
typeJson.set("type", "json");
configDiff.set("parser", typeJson);
}

return configDiff;
}

private static JsonParser.Stream newJsonParser(final Buffer buffer, final BufferAllocator bufferAllocator) {
final ArrayList<InputStream> inputStreams = new ArrayList<>();
inputStreams.add(buildByteArrayInputStream(buffer));

final InputStreamFileInput.IteratorProvider iteratorProvider = new InputStreamFileInput.IteratorProvider(inputStreams);

final FileInputInputStream input = new FileInputInputStream(new InputStreamFileInput(bufferAllocator, iteratorProvider));
input.nextFile();
try {
return (new JsonParser()).open(input);
} catch (final IOException ex) {
throw new UncheckedIOException(ex);
}
}

@SuppressWarnings("deprecation") // For the use of Buffer#array.
private static ByteArrayInputStream buildByteArrayInputStream(final Buffer buffer) {
return new ByteArrayInputStream(buffer.array());
}

private static final ConfigMapperFactory CONFIG_MAPPER_FACTORY = ConfigMapperFactory.builder().addDefaultModules().build();
}
1 change: 1 addition & 0 deletions settings.gradle
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
rootProject.name = "embulk-parser-json"
include "embulk-guess-json"

0 comments on commit b76a78b

Please sign in to comment.