Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ jobs:
strategy:
matrix:
es-version:
- 'es:9.1.4'
- 'es:8.15.2'
- 'es:8.14.3'
- 'es:8.13.4'
Expand Down Expand Up @@ -46,7 +47,7 @@ jobs:
- 'os:2.7.0'
- 'os:2.6.0'
env:
mainJob: ${{ matrix.es-version == 'es:8.15.2' }}
mainJob: ${{ matrix.es-version == 'es:9.1.4' }}
sudachiVersion: 20241021
sudachiKind: core
continue-on-error: true
Expand Down
19 changes: 10 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,20 @@ Check [changelog](./CHANGELOG.md) for more.

1. Build analysis-sudachi.
```
$ ./gradlew -PengineVersion=es:8.15.2 build
$ ./gradlew -PengineVersion=es:9.1.4 build
```

Use `-PengineVersion=os:2.18.0` for OpenSearch.

## Supported ElasticSearch versions

1. 8.0.* until 8.15.* supported, integration tests in CI
2. 7.17.* (latest patch version) - supported, integration tests in CI
3. 7.11.* until 7.16.* - best effort support, not tested in CI
4. 7.10.* integration tests for the latest patch version
5. 7.9.* and below - not tested in CI at all, may be broken
6. 7.3.* and below - broken, not supported
1. 9.0.* until 9.1.* supported, integration tests in CI
2. 8.0.* until 8.15.* supported, integration tests in CI
3. 7.17.* (latest patch version) - supported, integration tests in CI
4. 7.11.* until 7.16.* - best effort support, not tested in CI
5. 7.10.* integration tests for the latest patch version
6. 7.9.* and below - not tested in CI at all, may be broken
7. 7.3.* and below - broken, not supported

## Supported OpenSearch versions

Expand All @@ -43,11 +44,11 @@ Use `-PengineVersion=os:2.18.0` for OpenSearch.

a. Using the release package
```
$ bin/elasticsearch-plugin install https://github.com/WorksApplications/elasticsearch-sudachi/releases/download/v3.1.1/analysis-sudachi-8.13.4-3.1.1.zip
$ bin/elasticsearch-plugin install https://github.com/WorksApplications/elasticsearch-sudachi/releases/download/v3.1.1/analysis-sudachi-9.1.4-3.1.1.zip
```
b. Using self-build package
```
$ bin/elasticsearch-plugin install file:///path/to/analysis-sudachi-8.13.4-3.1.1.zip
$ bin/elasticsearch-plugin install file:///path/to/analysis-sudachi-9.1.4-3.1.1.zip
```
(Specify the absolute path in URI format)
3. Download sudachi dictionary archive from https://github.com/WorksApplications/SudachiDict
Expand Down
44 changes: 25 additions & 19 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ import org.jetbrains.kotlin.gradle.dsl.JvmTarget

plugins {
id 'java-library'
id 'org.jetbrains.kotlin.jvm' version '1.8.0'
id "org.jetbrains.kotlin.plugin.serialization" version "1.8.0"
id 'com.diffplug.spotless' version '6.16.0'
id 'org.jetbrains.kotlin.jvm' version '2.0.0'
id "org.jetbrains.kotlin.plugin.serialization" version '2.0.0'
id 'com.diffplug.spotless' version '6.25.0'
id 'org.sonarqube' version '4.0.0.2929'
id("org.jetbrains.kotlinx.kover") version "0.7.0"
id 'com.worksap.nlp.sudachi.esc'
Expand All @@ -13,15 +13,20 @@ plugins {
}

group = 'com.worksap.nlp'
archivesBaseName = 'analysis-sudachi'
base.archivesName.set('analysis-sudachi')
version = properties["pluginVersion"]

java {
sourceCompatibility = JavaVersion.VERSION_21
targetCompatibility = JavaVersion.VERSION_21
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

JVM 21 is required by ES9

}

compileKotlin {
compilerOptions.jvmTarget.set(JvmTarget.JVM_11)
compilerOptions.jvmTarget.set(JvmTarget.JVM_21)
}

compileTestKotlin {
compilerOptions.jvmTarget.set(JvmTarget.JVM_11)
compilerOptions.jvmTarget.set(JvmTarget.JVM_21)
}

configurations {
Expand All @@ -39,7 +44,9 @@ sourceSets {
}

dependencies {
spi(project(':spi'))
implementation(project(':spi'))
implementation('com.worksap.nlp:sudachi:0.7.4')
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is required to load the sudachi jar in ES9. We can't use the previous dynamic loading anymore due to entitlement-policy.yaml


testImplementation(project(':testlib'))
testImplementation('org.apache.logging.log4j:log4j-core:2.17.2')
testImplementation('org.jetbrains.kotlin:kotlin-test-junit') {
Expand All @@ -52,13 +59,16 @@ dependencies {

def embedVersion = tasks.register('embedVersion', Copy) {
var esKind = sudachiEs.kind.get()
from 'src/main/extras/plugin-descriptor.properties'
from('src/main/extras/plugin-descriptor.properties') {
expand([
version: version,
engineVersion: esKind.version,
engineKind: esKind.engine.kind
])
}
// Include entitlement policy for Elasticsearch 9+
from('src/main/extras/entitlement-policy.yaml')
into "build/package/${version}/${esKind.engine.kind}-${esKind.version}"
expand([
version: version,
engineVersion: esKind.version,
engineKind: esKind.engine.kind
])
inputs.property("version", version)
inputs.property("elasticSearchVersion", esKind.version)
}
Expand All @@ -74,17 +84,13 @@ def packageJars = tasks.register('packageJars', Copy) {
def packageSpiJars = tasks.register('packageSpiJars', Copy) {
from configurations.spi
var esKind = sudachiEs.kind.get()
if (sudachiEs.hasPluginSpiSupport()) {
into "build/package/${version}/${esKind.engine.kind}-${esKind.version}/spi"
} else {
into "build/package/${version}/${esKind.engine.kind}-${esKind.version}"
}
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See above, we can't do dynamic loading due to entitlement-policy.yaml--at least I couldn't get it to work.

into "build/package/${version}/${esKind.engine.kind}-${esKind.version}"
}

def distZip = tasks.register('distZip', Zip) {
var esKind = sudachiEs.kind.get()
dependsOn embedVersion, packageJars, packageSpiJars
archiveBaseName.set("${esKind.engine.kind}-${esKind.version}-$archivesBaseName")
archiveBaseName.set("${esKind.engine.kind}-${esKind.version}-${base.archivesName.get()}")
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a deprecation fix in gradle

from("build/package/${version}/${esKind.engine.kind}-${esKind.version}", 'LICENSE', 'README.md')
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ class EsConventions implements Plugin<Project> {
@Override
void apply(Project target) {
target.tasks.withType(JavaCompile).configureEach {
options.release.set(11)
options.release.set(21)
options.encoding = 'UTF-8'
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,18 @@ import java.util.zip.ZipFile

class EsTestEnvExtension {
Path bundlePath = null
Path systemDic = null
Path configFile = null
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are now handled by addConfigFile (see below)

List<Path> additionalJars = new ArrayList<>()
List<PluginDescriptor> additionalPlugins = new ArrayList<>()
List<ConfigFileDescriptor> configFiles = new ArrayList<>()

void addPlugin(String name, Object value) {
additionalPlugins.add(new PluginDescriptor(name: name, value: value))
}

void addConfigFile(Path sourcePath, String targetName = null) {
def target = targetName ?: sourcePath.fileName.toString()
configFiles.add(new ConfigFileDescriptor(source: sourcePath, target: target))
}
}

class PluginDescriptor {
Expand All @@ -49,6 +53,11 @@ class PluginDescriptor {
}
}

class ConfigFileDescriptor {
Path source
String target
}

class StringProvider implements Provider<String>, Serializable {
private static final long serialVersionUID = 42L
String value
Expand Down Expand Up @@ -103,6 +112,10 @@ class StringProvider implements Provider<String>, Serializable {
throw new IllegalStateException("not implemented")
}

@Override
Provider<String> filter(org.gradle.api.specs.Spec<? super String> spec) {
throw new IllegalStateException("not implemented")
}

@Override
String toString() {
Expand Down Expand Up @@ -153,9 +166,10 @@ class EsTestEnvPlugin implements Plugin<Project> {
target.gradle.taskGraph.whenReady {
boolean shouldRun = false
if (target.plugins.findPlugin(EsSudachiPlugin.class) != null) {
shouldRun = shouldTestsRun(target.extensions.getByType(EsExtension).kind.get())
def kind = target.extensions.getByType(EsExtension).kind.get()
shouldRun = shouldTestsRun(kind)
}
target.tasks.findAll().forEach { Task task ->
target.tasks.withType(Test).forEach { Test task ->
task.onlyIf { shouldRun }
}
}
Expand Down Expand Up @@ -202,8 +216,9 @@ class EsTestEnvPlugin implements Plugin<Project> {

def sudachiConfigDir = configPath.resolve("sudachi")
Files.createDirectories(sudachiConfigDir)
Files.copy(ext.systemDic, sudachiConfigDir.resolve("system_core.dic"))
Files.copy(ext.configFile, sudachiConfigDir.resolve("sudachi.json"))
for (ConfigFileDescriptor config in ext.configFiles) {
Files.copy(config.source, sudachiConfigDir.resolve(config.target))
}

return rootPath
}
Expand Down
3 changes: 3 additions & 0 deletions buildSrc/src/main/groovy/com/worksap/nlp/tools/engines.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ enum EsSupport implements EngineSupport {
Es84("es-8.04"),
Es810("es-8.10"),
Es812("es-8.12"),
Es90("es-9.00"),

String tag
List<String> keys
Expand Down Expand Up @@ -41,6 +42,8 @@ enum EsSupport implements EngineSupport {
return Es810
} else if (vers.ge(8, 12) && vers.lt(9, 0)) {
return Es812
} else if (vers.ge(9, 0)) {
return Es90
} else {
throw new IllegalArgumentException("unsupported ElasticSearch version: " + vers.raw)
}
Expand Down
6 changes: 3 additions & 3 deletions gradle.properties
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# elasticsearch versions: 8.15.2, 8.14.3, 8.13.4, 8.12.2, 8.11.4, 8.10.4, 8.9.2,
# elasticsearch versions: 9.1.4, 8.15.2, 8.14.3, 8.13.4, 8.12.2, 8.11.4, 8.10.4, 8.9.2,
# 8.8.1, 8.6.2, 8.5.3, 8.4.3, 8.2.3, 7.17.24, 7.14.2, 7.10.2
# opensearch version: 2.18.0, 2.17.1, 2.16.0, 2.15.0, 2.14.0, 2.13.0, 2.12.0, 2.11.1,
# opensearch version: 2.18.0, 2.17.1, 2.16.0, 2.15.0, 2.14.0, 2.13.0, 2.12.0, 2.11.1,
# 2.10.0, 2.9.0, 2.8.0, 2.7.0, 2.6.0
engineVersion=es:8.15.2
engineVersion=es:9.1.4
org.gradle.jvmargs=-XX:MaxMetaspaceSize=350m \
--add-exports jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED \
--add-exports jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED \
Expand Down
2 changes: 1 addition & 1 deletion gradle/wrapper/gradle-wrapper.properties
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-8.1.1-bin.zip
distributionUrl=https\://services.gradle.org/distributions/gradle-8.11-bin.zip
networkTimeout=10000
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
38 changes: 28 additions & 10 deletions integration/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,33 @@ plugins {

version = properties["pluginVersion"]

java {
sourceCompatibility = JavaVersion.VERSION_21
targetCompatibility = JavaVersion.VERSION_21
}

compileKotlin {
compilerOptions.jvmTarget.set(JvmTarget.JVM_11)
compilerOptions.jvmTarget.set(JvmTarget.JVM_21)
}

compileTestKotlin {
compilerOptions.jvmTarget.set(JvmTarget.JVM_11)
compilerOptions.jvmTarget.set(JvmTarget.JVM_21)
}

configurations { buildSudachiDict }

dependencies {
buildSudachiDict (project(':spi'))
compileOnly(project(':'))
compileOnly(project(':spi'))
testCompileOnly(project(':testlib'))
testCompileOnly(project(':subplugin'))
testImplementation(project(':'))
testImplementation(project(':spi'))
testImplementation(project(':testlib'))
testImplementation(project(':subplugin'))
// Add ICU plugin JARs for ES 9.x dynamic loading
testRuntimeOnly files({
fileTree(dir: new File(project.buildDir, "cache/icu-plugin-extracted"), include: '**/*.jar')
}) {
builtBy 'extractIcuPlugin'
}
testImplementation('junit:junit:4.13.1') {
transitive = false
}
Expand Down Expand Up @@ -65,19 +76,25 @@ def downloadIcuPlugin = tasks.register('downloadIcuPlugin', Download.class) {
overwrite(false)
}

def extractIcuPlugin = tasks.register('extractIcuPlugin', Copy) {
dependsOn downloadIcuPlugin
from zipTree(downloadIcuPlugin.get().dest)
into new File(project.buildDir, "cache/icu-plugin-extracted")
}

esTestEnv {
def esKind = sudachiEs.kind.get()
def packageDir = rootDir.toPath().resolve("build/package/${version}/${esKind.engine.kind}-${esKind.version}")
bundlePath = packageDir
systemDic = compileSystemDictionary.get().outputs.files.singleFile.toPath()
configFile = rootProject.rootDir.toPath().resolve("src/test/resources/com/worksap/nlp/lucene/sudachi/ja/sudachi.json")
additionalJars.add(project(":testlib").getTasksByName('jar', false).first().outputs.files.singleFile.toPath())
addPlugin("analysis-icu", downloadIcuPlugin)
addPlugin('sudachi-sub', project(':subplugin').getTasksByName('distZip', false).first())
addConfigFile(compileSystemDictionary.get().outputs.files.singleFile.toPath(), "system_core.dic")
addConfigFile(rootProject.rootDir.toPath().resolve("src/test/resources/com/worksap/nlp/lucene/sudachi/ja/sudachi.json"), "sudachi.json")
addConfigFile(rootProject.rootDir.toPath().resolve("src/test/resources/com/worksap/nlp/lucene/sudachi/ja/sudachi_subplugin.json"))
}

test {
onlyIf { ! (sudachiEs.isEs() && sudachiEs.kind.get().parsedVersion().ge(8, 9)) }
dependsOn(
':packageJars',
':packageSpiJars',
Expand All @@ -86,6 +103,7 @@ test {
compileSystemDictionary,
':testlib:jar',
downloadIcuPlugin,
extractIcuPlugin,
':subplugin:distZip'
)
systemProperty("tests.security.manager", true)
Expand All @@ -94,7 +112,7 @@ test {

def distZip = tasks.register('distZip', Zip) {
var esKind = sudachiEs.kind.get()
archiveBaseName.set("${esKind.engine.kind}-${esKind.version}-$archivesBaseName")
archiveBaseName.set("${esKind.engine.kind}-${esKind.version}-${project.name}")
from(
project(':subplugin').packageJars.outputs.files,
project(':subplugin').embedVersion.outputs.files,
Expand Down
Loading