1+ using Azure . Storage . Blobs ;
2+ using Azure . Storage . Blobs . Models ;
3+ using FlowSynx . PluginCore ;
4+ using System . Text ;
5+
6+ namespace FlowSynx . Plugins . Azure . Blobs . Extensions ;
7+
8+ public static class ConverterExtensions
9+ {
10+ public static async Task < PluginContext > ToContext ( this BlobClient blobClient , bool ? includeMetadata ,
11+ CancellationToken cancellationToken )
12+ {
13+ BlobDownloadInfo download = await blobClient . DownloadAsync ( cancellationToken ) ;
14+
15+ var ms = new MemoryStream ( ) ;
16+ await download . Content . CopyToAsync ( ms , cancellationToken ) ;
17+ ms . Seek ( 0 , SeekOrigin . Begin ) ;
18+
19+ var dataBytes = ms . ToArray ( ) ;
20+ var isBinaryFile = IsBinaryFile ( dataBytes ) ;
21+ var rawData = isBinaryFile ? dataBytes : null ;
22+ var content = ! isBinaryFile ? Encoding . UTF8 . GetString ( dataBytes ) : null ;
23+
24+ var context = new PluginContext ( blobClient . Name , "File" )
25+ {
26+ RawData = rawData ,
27+ Content = content ,
28+ } ;
29+
30+ if ( includeMetadata is true )
31+ {
32+ var blobProperties = await blobClient . GetPropertiesAsync ( cancellationToken : cancellationToken ) . ConfigureAwait ( false ) ;
33+ AddProperties ( context , blobProperties ) ;
34+ }
35+
36+ return context ;
37+ }
38+
39+ private static bool IsBinaryFile ( byte [ ] data , int sampleSize = 1024 )
40+ {
41+ if ( data == null || data . Length == 0 )
42+ return false ;
43+
44+ int checkLength = Math . Min ( sampleSize , data . Length ) ;
45+ int nonPrintableCount = data . Take ( checkLength )
46+ . Count ( b => ( b < 8 || ( b > 13 && b < 32 ) ) && b != 9 && b != 10 && b != 13 ) ;
47+
48+ double threshold = 0.1 ; // 10% threshold of non-printable characters
49+ return ( double ) nonPrintableCount / checkLength > threshold ;
50+ }
51+
52+ private static void AddProperties ( PluginContext context , BlobProperties properties )
53+ {
54+ context . Metadata . Add ( "AccessTier" , properties . AccessTier ) ;
55+ context . Metadata . Add ( "AccessTierChangedOn" , properties . AccessTierChangedOn ) ;
56+ context . Metadata . Add ( "AccessTierInferred" , properties . AccessTierInferred ) ;
57+ context . Metadata . Add ( "BlobSequenceNumber" , properties . BlobSequenceNumber ) ;
58+ context . Metadata . Add ( "BlobType" , properties . BlobType ) ;
59+ context . Metadata . Add ( "CacheControl" , properties . CacheControl ) ;
60+ context . Metadata . Add ( "ContentDisposition" , properties . ContentDisposition ) ;
61+ context . Metadata . Add ( "ContentEncoding" , properties . ContentEncoding ) ;
62+ context . Metadata . Add ( "ContentHash" , properties . ContentHash . ToHexString ( ) ) ;
63+ context . Metadata . Add ( "ContentLanguage" , properties . ContentLanguage ) ;
64+ context . Metadata . Add ( "ContentLength" , properties . ContentLength ) ;
65+ context . Metadata . Add ( "ContentType" , properties . ContentType ) ;
66+ context . Metadata . Add ( "CopyCompletedOn" , properties . CopyCompletedOn ) ;
67+ context . Metadata . Add ( "CopyId" , properties . CopyId ) ;
68+ context . Metadata . Add ( "CopyProgress" , properties . CopyProgress ) ;
69+ context . Metadata . Add ( "CopySource" , properties . CopySource ) ;
70+ context . Metadata . Add ( "CopyStatus" , properties . CopyStatus ) ;
71+ context . Metadata . Add ( "CopyStatusDescription" , properties . CopyStatusDescription ) ;
72+ context . Metadata . Add ( "CreatedOn" , properties . CreatedOn ) ;
73+ context . Metadata . Add ( "DestinationSnapshot" , properties . DestinationSnapshot ) ;
74+ context . Metadata . Add ( "ETag" , properties . ETag ) ;
75+ context . Metadata . Add ( "LastModified" , properties . LastModified ) ;
76+ context . Metadata . Add ( "LeaseDuration" , properties . LeaseDuration ) ;
77+ context . Metadata . Add ( "LeaseState" , properties . LeaseState ) ;
78+ context . Metadata . Add ( "LeaseStatus" , properties . LeaseStatus ) ;
79+ }
80+ }
0 commit comments