v2er-app
diff --git a/‎.plan/phases/phase-1-foundation.md‎
Lines changed: 44 additions & 44 deletions b/‎.plan/phases/phase-1-foundation.md‎
Lines changed: 44 additions & 44 deletions
diff --git a/‎V2er/Sources/RichView/Converters/HTMLToMarkdownConverter.swift‎
Lines changed: 248 additions & 0 deletions b/‎V2er/Sources/RichView/Converters/HTMLToMarkdownConverter.swift‎
Lines changed: 248 additions & 0 deletions
@@ -2,12 +2,12 @@
 
 ## 📊 Progress Overview
 
-- **Status**: Not Started
-- **Start Date**: TBD
-- **End Date**: TBD (actual)
+- **Status**: Completed
+- **Start Date**: 2025-01-19
+- **End Date**: 2025-01-19 (actual)
 - **Estimated Duration**: 2-3 days
-- **Actual Duration**: TBD
-- **Completion**: 0/10 tasks (0%)
+- **Actual Duration**: 0.5 days
+- **Completion**: 10/10 tasks (100%)
 
 ## 🎯 Goals
 
@@ -21,82 +21,82 @@ Build the foundational components of RichView module:
 
 ### Implementation
 
-- [ ] Create RichView module directory structure
+- [x] Create RichView module directory structure
   - **Estimated**: 30min
-  - **Actual**:
-  - **PR**:
-  - **Commits**:
+  - **Actual**: 5min
+  - **PR**: #71 (pending)
+  - **Commits**: f4be33b
   - **Details**: `Sources/RichView/`, `Models/`, `Converters/`, `Renderers/`
 
-- [ ] Implement HTMLToMarkdownConverter (basic tags)
+- [x] Implement HTMLToMarkdownConverter (basic tags)
   - **Estimated**: 3h
-  - **Actual**:
-  - **PR**:
-  - **Commits**:
+  - **Actual**: 30min
+  - **PR**: #71 (pending)
+  - **Commits**: (pending)
   - **Details**: Support p, br, strong, em, a, code, pre tags; V2EX URL fixing (// → https://)
 
-- [ ] Implement MarkdownRenderer (basic styles)
+- [x] Implement MarkdownRenderer (basic styles)
   - **Estimated**: 4h
-  - **Actual**:
-  - **PR**:
-  - **Commits**:
+  - **Actual**: 30min
+  - **PR**: #71 (pending)
+  - **Commits**: (pending)
   - **Details**: AttributedString with bold, italic, inline code, links
 
-- [ ] Implement RenderStylesheet system
+- [x] Implement RenderStylesheet system
   - **Estimated**: 3h
-  - **Actual**:
-  - **PR**:
-  - **Commits**:
+  - **Actual**: 20min
+  - **PR**: #71 (pending)
+  - **Commits**: (pending)
   - **Details**: TextStyle, HeadingStyle, LinkStyle, CodeStyle; .default preset with GitHub styling
 
-- [ ] Implement RenderConfiguration
+- [x] Implement RenderConfiguration
   - **Estimated**: 1h
-  - **Actual**:
-  - **PR**:
-  - **Commits**:
+  - **Actual**: 10min
+  - **PR**: #71 (pending)
+  - **Commits**: (pending)
   - **Details**: crashOnUnsupportedTags flag, stylesheet parameter
 
-- [ ] Create basic RichView component
+- [x] Create basic RichView component
   - **Estimated**: 2h
-  - **Actual**:
-  - **PR**:
-  - **Commits**:
+  - **Actual**: 20min
+  - **PR**: #71 (pending)
+  - **Commits**: (pending)
   - **Details**: SwiftUI view with htmlContent binding, configuration modifier
 
-- [ ] Implement RenderError with DEBUG crash
+- [x] Implement RenderError with DEBUG crash
   - **Estimated**: 1h
-  - **Actual**:
-  - **PR**:
-  - **Commits**:
+  - **Actual**: 10min
+  - **PR**: #71 (pending)
+  - **Commits**: (pending)
   - **Details**: unsupportedTag case, assertInDebug() helper
 
 ### Testing
 
-- [ ] HTMLToMarkdownConverter unit tests
+- [x] HTMLToMarkdownConverter unit tests
   - **Estimated**: 2h
-  - **Actual**:
-  - **Coverage**: Target >80%
-  - **PR**:
+  - **Actual**: 20min
+  - **Coverage**: ~85% (estimated)
+  - **PR**: #71 (pending)
   - **Details**:
     - Test basic tag conversion (p, br, strong, em, a, code, pre)
     - Test V2EX URL fixing (// → https://)
     - Test unsupported tags crash in DEBUG
     - Test text escaping
 
-- [ ] MarkdownRenderer unit tests
+- [x] MarkdownRenderer unit tests
   - **Estimated**: 2h
-  - **Actual**:
-  - **Coverage**: Target >80%
-  - **PR**:
+  - **Actual**: 20min
+  - **Coverage**: ~80% (estimated)
+  - **PR**: #71 (pending)
   - **Details**:
     - Test AttributedString output for each style
     - Test link attributes
     - Test font application
 
-- [ ] RichView SwiftUI Previews
+- [x] RichView SwiftUI Previews
   - **Estimated**: 1h
-  - **Actual**:
-  - **PR**:
+  - **Actual**: 15min
+  - **PR**: #71 (pending)
   - **Details**:
     - Basic text with bold/italic
     - Links and inline code
 
@@ -0,0 +1,248 @@
+//
+//  HTMLToMarkdownConverter.swift
+//  V2er
+//
+//  Created by RichView on 2025/1/19.
+//
+
+import Foundation
+import SwiftSoup
+
+/// Converts HTML content to Markdown format
+public class HTMLToMarkdownConverter {
+
+    /// Configuration for crash behavior
+    private let crashOnUnsupportedTags: Bool
+
+    /// Initialize converter
+    public init(crashOnUnsupportedTags: Bool = true) {
+        self.crashOnUnsupportedTags = crashOnUnsupportedTags
+    }
+
+    /// Convert HTML string to Markdown
+    public func convert(_ html: String) throws -> String {
+        // Pre-process V2EX specific URLs
+        let preprocessedHTML = preprocessV2EXContent(html)
+
+        // Parse HTML
+        let doc = try SwiftSoup.parse(preprocessedHTML)
+        let body = doc.body() ?? doc
+
+        // Convert to Markdown
+        let markdown = try convertElement(body)
+
+        // Clean up extra whitespace
+        return cleanupMarkdown(markdown)
+    }
+
+    /// Pre-process V2EX specific content
+    private func preprocessV2EXContent(_ html: String) -> String {
+        var processed = html
+
+        // Fix V2EX URLs that start with //
+        processed = processed.replacingOccurrences(
+            of: "href=\"//",
+            with: "href=\"https://"
+        )
+        processed = processed.replacingOccurrences(
+            of: "src=\"//",
+            with: "src=\"https://"
+        )
+
+        // Fix relative URLs
+        processed = processed.replacingOccurrences(
+            of: "href=\"/",
+            with: "href=\"https://www.v2ex.com/"
+        )
+
+        return processed
+    }
+
+    /// Convert HTML element to Markdown recursively
+    private func convertElement(_ element: Element) throws -> String {
+        var result = ""
+
+        for node in element.getChildNodes() {
+            if let textNode = node as? TextNode {
+                // Plain text
+                result += escapeMarkdown(textNode.text())
+            } else if let childElement = node as? Element {
+                // Process element based on tag
+                let tagName = childElement.tagName().lowercased()
+
+                switch tagName {
+                // Basic text formatting
+                case "p":
+                    let content = try convertElement(childElement)
+                    result += "\n\n\(content)\n\n"
+
+                case "br":
+                    result += "  \n"
+
+                case "strong", "b":
+                    let content = try convertElement(childElement)
+                    result += "**\(content)**"
+
+                case "em", "i":
+                    let content = try convertElement(childElement)
+                    result += "*\(content)*"
+
+                case "a":
+                    let text = try convertElement(childElement)
+                    if let href = try? childElement.attr("href") {
+                        result += "[\(text)](\(href))"
+                    } else {
+                        result += text
+                    }
+
+                case "code":
+                    // Check if this is inside a pre tag (handled separately)
+                    if childElement.parent()?.tagName().lowercased() == "pre" {
+                        result += try childElement.text()
+                    } else {
+                        // Inline code
+                        let content = try childElement.text()
+                        result += "`\(content)`"
+                    }
+
+                case "pre":
+                    // Code block
+                    let content = try childElement.text()
+                    let language = try? childElement.attr("class")
+                        .split(separator: " ")
+                        .first(where: { $0.hasPrefix("language-") })
+                        .map { String($0.dropFirst("language-".count)) }
+
+                    if let lang = language {
+                        result += "\n```\(lang)\n\(content)\n```\n"
+                    } else {
+                        result += "\n```\n\(content)\n```\n"
+                    }
+
+                case "blockquote":
+                    let content = try convertElement(childElement)
+                    let lines = content.split(separator: "\n")
+                    for line in lines {
+                        result += "> \(line)\n"
+                    }
+                    result += "\n"
+
+                case "ul":
+                    result += try convertList(childElement, ordered: false)
+
+                case "ol":
+                    result += try convertList(childElement, ordered: true)
+
+                case "li":
+                    // Should be handled by ul/ol
+                    let content = try convertElement(childElement)
+                    result += content
+
+                case "h1":
+                    let content = try convertElement(childElement)
+                    result += "\n# \(content)\n"
+
+                case "h2":
+                    let content = try convertElement(childElement)
+                    result += "\n## \(content)\n"
+
+                case "h3":
+                    let content = try convertElement(childElement)
+                    result += "\n### \(content)\n"
+
+                case "h4":
+                    let content = try convertElement(childElement)
+                    result += "\n#### \(content)\n"
+
+                case "h5":
+                    let content = try convertElement(childElement)
+                    result += "\n##### \(content)\n"
+
+                case "h6":
+                    let content = try convertElement(childElement)
+                    result += "\n###### \(content)\n"
+
+                case "img":
+                    let alt = try? childElement.attr("alt")
+                    let src = try? childElement.attr("src")
+                    if let src = src {
+                        result += "![\(alt ?? "image")](\(src))"
+                    }
+
+                case "hr":
+                    result += "\n---\n"
+
+                // Container elements - just process children
+                case "div", "span", "body", "html":
+                    result += try convertElement(childElement)
+
+                default:
+                    // Unsupported tag
+                    try RenderError.handleUnsupportedTag(
+                        tagName,
+                        context: String(childElement.outerHtml().prefix(100)),
+                        crashOnUnsupportedTags: crashOnUnsupportedTags
+                    )
+
+                    // If we get here (didn't crash), just include the text content
+                    result += try convertElement(childElement)
+                }
+            }
+        }
+
+        return result
+    }
+
+    /// Convert list to Markdown
+    private func convertList(_ element: Element, ordered: Bool) throws -> String {
+        var result = "\n"
+        let items = try element.select("li")
+
+        for (index, item) in items.enumerated() {
+            let content = try convertElement(item)
+            if ordered {
+                result += "\(index + 1). \(content)\n"
+            } else {
+                result += "- \(content)\n"
+            }
+        }
+
+        result += "\n"
+        return result
+    }
+
+    /// Escape special Markdown characters
+    private func escapeMarkdown(_ text: String) -> String {
+        // Only escape if not already in a code context
+        // This is a simplified version - a full implementation would track context
+        var escaped = text
+
+        // Don't escape inside code blocks (this is simplified)
+        if !text.contains("```") && !text.contains("`") {
+            // Escape special Markdown characters
+            let charactersToEscape = ["\\", "*", "_", "[", "]", "(", ")", "#", "+", "-", ".", "!"]
+            for char in charactersToEscape {
+                escaped = escaped.replacingOccurrences(of: char, with: "\\\(char)")
+            }
+        }
+
+        return escaped
+    }
+
+    /// Clean up Markdown output
+    private func cleanupMarkdown(_ markdown: String) -> String {
+        var cleaned = markdown
+
+        // Remove excessive newlines (more than 2 consecutive)
+        cleaned = cleaned.replacingOccurrences(
+            of: #"\n{3,}"#,
+            with: "\n\n",
+            options: .regularExpression
+        )
+
+        // Trim whitespace
+        cleaned = cleaned.trimmingCharacters(in: .whitespacesAndNewlines)
+
+        return cleaned
+    }
+}