apache · the-other-tim-brown · Nov 5, 2025 · Sep 8, 2025 · Sep 10, 2025 · Sep 10, 2025
diff --git a/.gitignore b/.gitignore
@@ -26,6 +26,7 @@ hs_err_pid*
 # Ignore java-version and idea files.
 .java-version
 .idea
+.vscode
 
 # Ignore Gradle project-specific cache directory
 .gradle

diff --git a/pom.xml b/pom.xml
@@ -88,6 +88,7 @@
         <spark.version.prefix>3.4</spark.version.prefix>
         <iceberg.version>1.4.2</iceberg.version>
         <delta.version>2.4.0</delta.version>
+        <paimon.version>1.2.0</paimon.version>
         <jackson.version>2.18.2</jackson.version>
         <spotless.version>2.43.0</spotless.version>
         <apache.rat.version>0.16.1</apache.rat.version>
@@ -333,6 +334,18 @@
                 <version>${delta.hive.version}</version>
             </dependency>
 
+            <!-- Paimon -->
+            <dependency>
+                <groupId>org.apache.paimon</groupId>
+                <artifactId>paimon-bundle</artifactId>
+                <version>${paimon.version}</version>
+            </dependency>
+            <dependency>
+                <groupId>org.apache.paimon</groupId>
+                <artifactId>paimon-spark-${spark.version.prefix}</artifactId>
+                <version>${paimon.version}</version>
+            </dependency>
+
             <!-- Spark -->
             <dependency>
                 <groupId>org.apache.spark</groupId>

diff --git a/xtable-api/src/main/java/org/apache/xtable/model/storage/TableFormat.java b/xtable-api/src/main/java/org/apache/xtable/model/storage/TableFormat.java
@@ -27,9 +27,10 @@ public class TableFormat {
   public static final String HUDI = "HUDI";
   public static final String ICEBERG = "ICEBERG";
   public static final String DELTA = "DELTA";
+  public static final String PAIMON = "PAIMON";
   public static final String PARQUET = "PARQUET";
 
   public static String[] values() {
-    return new String[] {"HUDI", "ICEBERG", "DELTA"};
+    return new String[] {"HUDI", "ICEBERG", "DELTA", "PAIMON"};
   }
 }
diff --git a/xtable-core/pom.xml b/xtable-core/pom.xml
@@ -110,6 +110,17 @@
             <scope>test</scope>
         </dependency>
 
+        <!-- Paimon dependencies -->
+        <dependency>
+            <groupId>org.apache.paimon</groupId>
+            <artifactId>paimon-bundle</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.paimon</groupId>
+            <artifactId>paimon-spark-${spark.version.prefix}</artifactId>
+            <scope>test</scope>
+        </dependency>
+
         <!-- Hadoop dependencies -->
         <dependency>
             <groupId>org.apache.hadoop</groupId>

diff --git a/xtable-core/src/main/java/org/apache/xtable/paimon/PaimonConversionSource.java b/xtable-core/src/main/java/org/apache/xtable/paimon/PaimonConversionSource.java
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.xtable.paimon;
+
+import java.io.IOException;
+import java.time.Instant;
+import java.util.List;
+
+import lombok.extern.log4j.Log4j2;
+
+import org.apache.paimon.Snapshot;
+import org.apache.paimon.schema.SchemaManager;
+import org.apache.paimon.schema.TableSchema;
+import org.apache.paimon.table.FileStoreTable;
+import org.apache.paimon.utils.SnapshotManager;
+
+import org.apache.xtable.exception.ReadException;
+import org.apache.xtable.model.*;
+import org.apache.xtable.model.schema.InternalPartitionField;
+import org.apache.xtable.model.schema.InternalSchema;
+import org.apache.xtable.model.storage.DataLayoutStrategy;
+import org.apache.xtable.model.storage.InternalDataFile;
+import org.apache.xtable.model.storage.PartitionFileGroup;
+import org.apache.xtable.model.storage.TableFormat;
+import org.apache.xtable.spi.extractor.ConversionSource;
+
+@Log4j2
+public class PaimonConversionSource implements ConversionSource<Snapshot> {
+
+  private final FileStoreTable paimonTable;
+  private final SchemaManager schemaManager;
+  private final SnapshotManager snapshotManager;
+
+  private final PaimonDataFileExtractor dataFileExtractor = PaimonDataFileExtractor.getInstance();
+  private final PaimonSchemaExtractor schemaExtractor = PaimonSchemaExtractor.getInstance();
+  private final PaimonPartitionExtractor partitionSpecExtractor =
+      PaimonPartitionExtractor.getInstance();
+
+  public PaimonConversionSource(FileStoreTable paimonTable) {
+    this.paimonTable = paimonTable;
+    this.schemaManager = paimonTable.schemaManager();
+    this.snapshotManager = paimonTable.snapshotManager();
+  }
+
+  @Override
+  public InternalTable getTable(Snapshot snapshot) {
+    TableSchema paimonSchema = schemaManager.schema(snapshot.schemaId());
+    InternalSchema internalSchema = schemaExtractor.toInternalSchema(paimonSchema);
+
+    List<String> partitionKeys = paimonTable.partitionKeys();
+    List<InternalPartitionField> partitioningFields =
+        partitionSpecExtractor.toInternalPartitionFields(partitionKeys, internalSchema);
+
+    return InternalTable.builder()
+        .name(paimonTable.name())
+        .tableFormat(TableFormat.PAIMON)
+        .readSchema(internalSchema)
+        .layoutStrategy(DataLayoutStrategy.HIVE_STYLE_PARTITION)
+        .basePath(paimonTable.location().toString())
+        .partitioningFields(partitioningFields)
+        .latestCommitTime(Instant.ofEpochMilli(snapshot.timeMillis()))
+        .latestMetadataPath(snapshotManager.snapshotPath(snapshot.id()).toString())
+        .build();
+  }
+
+  @Override
+  public InternalTable getCurrentTable() {
+    Snapshot snapshot = getLastSnapshot();
+    return getTable(snapshot);
+  }
+
+  @Override
+  public InternalSnapshot getCurrentSnapshot() {
+    Snapshot snapshot = getLastSnapshot();
+    InternalTable internalTable = getTable(snapshot);
+    InternalSchema internalSchema = internalTable.getReadSchema();
+    List<InternalDataFile> dataFiles =
+        dataFileExtractor.toInternalDataFiles(paimonTable, snapshot, internalSchema);
+
+    return InternalSnapshot.builder()
+        .table(internalTable)
+        .version(Long.toString(snapshot.timeMillis()))
+        .partitionedDataFiles(PartitionFileGroup.fromFiles(dataFiles))
+        // TODO : Implement pending commits extraction, required for incremental sync
+        // https://github.com/apache/incubator-xtable/issues/754
+        .sourceIdentifier(getCommitIdentifier(snapshot))
+        .build();
+  }
+
+  private Snapshot getLastSnapshot() {
+    SnapshotManager snapshotManager = paimonTable.snapshotManager();
+    Snapshot snapshot = snapshotManager.latestSnapshot();
+    if (snapshot == null) {
+      throw new ReadException("No snapshots found for table " + paimonTable.name());
+    }
+    return snapshot;
+  }
+
+  @Override
+  public TableChange getTableChangeForCommit(Snapshot snapshot) {
+    throw new UnsupportedOperationException("Incremental Sync is not supported yet.");
+  }
+
+  @Override
+  public CommitsBacklog<Snapshot> getCommitsBacklog(
+      InstantsForIncrementalSync instantsForIncrementalSync) {
+    throw new UnsupportedOperationException("Incremental Sync is not supported yet.");
+  }
+
+  @Override
+  public boolean isIncrementalSyncSafeFrom(Instant instant) {
+    return false; // Incremental sync is not supported yet
+  }
+
+  @Override
+  public String getCommitIdentifier(Snapshot snapshot) {
+    return Long.toString(snapshot.commitIdentifier());
+  }
+
+  @Override
+  public void close() throws IOException {}
+}
diff --git a/xtable-core/src/main/java/org/apache/xtable/paimon/PaimonConversionSourceProvider.java b/xtable-core/src/main/java/org/apache/xtable/paimon/PaimonConversionSourceProvider.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.xtable.paimon;
+
+import java.io.IOException;
+
+import org.apache.paimon.Snapshot;
+import org.apache.paimon.catalog.CatalogContext;
+import org.apache.paimon.fs.FileIO;
+import org.apache.paimon.fs.Path;
+import org.apache.paimon.options.Options;
+import org.apache.paimon.table.FileStoreTable;
+import org.apache.paimon.table.FileStoreTableFactory;
+
+import org.apache.xtable.conversion.ConversionSourceProvider;
+import org.apache.xtable.conversion.SourceTable;
+import org.apache.xtable.exception.ReadException;
+import org.apache.xtable.spi.extractor.ConversionSource;
+
+public class PaimonConversionSourceProvider extends ConversionSourceProvider<Snapshot> {
+  @Override
+  public ConversionSource<Snapshot> getConversionSourceInstance(SourceTable sourceTableConfig) {
+    try {
+      Options catalogOptions = new Options();
+      CatalogContext context = CatalogContext.create(catalogOptions, hadoopConf);
+
+      Path path = new Path(sourceTableConfig.getDataPath());
+      FileIO fileIO = FileIO.get(path, context);
+      FileStoreTable paimonTable = FileStoreTableFactory.create(fileIO, path);
+
+      return new PaimonConversionSource(paimonTable);
+    } catch (IOException e) {
+      throw new ReadException("Failed to read Paimon table from file system", e);
+    }
+  }
+}
diff --git a/xtable-core/src/main/java/org/apache/xtable/paimon/PaimonDataFileExtractor.java b/xtable-core/src/main/java/org/apache/xtable/paimon/PaimonDataFileExtractor.java
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.xtable.paimon;
+
+import java.util.*;
+
+import org.apache.paimon.Snapshot;
+import org.apache.paimon.io.DataFileMeta;
+import org.apache.paimon.manifest.ManifestEntry;
+import org.apache.paimon.table.FileStoreTable;
+import org.apache.paimon.table.source.snapshot.SnapshotReader;
+
+import org.apache.xtable.model.schema.InternalSchema;
+import org.apache.xtable.model.stat.ColumnStat;
+import org.apache.xtable.model.storage.InternalDataFile;
+
+public class PaimonDataFileExtractor {
+
+  private final PaimonPartitionExtractor partitionExtractor =
+      PaimonPartitionExtractor.getInstance();
+
+  private static final PaimonDataFileExtractor INSTANCE = new PaimonDataFileExtractor();
+
+  public static PaimonDataFileExtractor getInstance() {
+    return INSTANCE;
+  }
+
+  public List<InternalDataFile> toInternalDataFiles(
+      FileStoreTable table, Snapshot snapshot, InternalSchema internalSchema) {
+    List<InternalDataFile> result = new ArrayList<>();
+    Iterator<ManifestEntry> manifestEntryIterator =
+        newSnapshotReader(table, snapshot).readFileIterator();
+    while (manifestEntryIterator.hasNext()) {
+      result.add(toInternalDataFile(table, manifestEntryIterator.next(), internalSchema));
+    }
+    return result;
+  }
+
+  private InternalDataFile toInternalDataFile(
+      FileStoreTable table, ManifestEntry entry, InternalSchema internalSchema) {
+    return InternalDataFile.builder()
+        .physicalPath(toFullPhysicalPath(table, entry))
+        .fileSizeBytes(entry.file().fileSize())
+        .lastModified(entry.file().creationTimeEpochMillis())
+        .recordCount(entry.file().rowCount())
+        .partitionValues(
+            partitionExtractor.toPartitionValues(table, entry.partition(), internalSchema))
+        .columnStats(toColumnStats(entry.file()))
+        .build();
+  }
+
+  private String toFullPhysicalPath(FileStoreTable table, ManifestEntry entry) {
+    String basePath = table.location().toString();
+    String bucketPath = "bucket-" + entry.bucket();
+    String filePath = entry.file().fileName();
+
+    Optional<String> partitionPath = partitionExtractor.toPartitionPath(table, entry.partition());
+    if (partitionPath.isPresent()) {
+      return String.join("/", basePath, partitionPath.get(), bucketPath, filePath);
+    } else {
+      return String.join("/", basePath, bucketPath, filePath);
+    }
+  }
+
+  private List<ColumnStat> toColumnStats(DataFileMeta file) {
+    // TODO: Implement logic to extract column stats from the file meta
+    // https://github.com/apache/incubator-xtable/issues/755
+    return Collections.emptyList();
+  }
+
+  private SnapshotReader newSnapshotReader(FileStoreTable table, Snapshot snapshot) {
+    // If the table has primary keys, we read only the top level files
+    // which means we can only consider fully compacted files.
+    if (!table.schema().primaryKeys().isEmpty()) {
+      return table
+          .newSnapshotReader()
+          .withLevel(table.coreOptions().numLevels() - 1)
+          .withSnapshot(snapshot);
+    } else {
+      return table.newSnapshotReader().withSnapshot(snapshot);
+    }
+  }
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -26,6 +26,7 @@ hs_err_pid* @@
     # Ignore java-version and idea files.
     .java-version
     .idea
+    .vscode
     # Ignore Gradle project-specific cache directory
     .gradle
@@ Expand Down @@