Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 19 additions & 12 deletions csvwlib/converter/ModelConverter.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def __init__(self, csv_url=None, metadata_url=None):
self.csvs = None
self.values_valiator = None
self.metadata_url = metadata_url
self.start_url = csv_url if csv_url is not None else metadata_url
self.start_url = csv_url if csv_url is not None else (metadata_url if not isinstance(metadata_url,dict) else metadata_url.get('url'))
self.metadata = None
self.atdm = {'@type': '@AnnotatedTableGroup'}
self.mode = CONST_STANDARD_MODE
Expand All @@ -34,6 +34,8 @@ def convert_to_atdm(self, mode=CONST_STANDARD_MODE):
metadata_validator = MetadataValidator(self.start_url)
self.mode = mode
self.metadata = MetadataLocator.find_and_get(self.csv_url, self.metadata_url)
if self.metadata_url and (isinstance(self.metadata_url,dict) or not self.metadata_url.startswith('http')):
self.metadata_url = "http://example.com/metadata"
self._normalize_metadata_base_url()
self._normalize_metadata_csv_url()
metadata_validator.validate_metadata(self.metadata)
Expand Down Expand Up @@ -75,14 +77,17 @@ def _add_table_metadata(table_metadata, table):
def _normalize_metadata_base_url(self):
if self.metadata is None:
return
for context_entry in self.metadata['@context']:
if type(context_entry) is dict and '@base' in context_entry:
original_url = self.metadata['url']
if original_url.startswith('http'):
directory, file_name = original_url.rsplit('/', 1)
self.metadata['url'] = directory + '/' + context_entry['@base'] + file_name
else:
self.metadata['url'] = context_entry['@base'] + self.metadata['url']
if isinstance(self.metadata,dict):
for context_entry in self.metadata.get('@context',[]):
if type(context_entry) is dict and '@base' in context_entry:
original_url = self.metadata["url"]
if original_url.startswith('http'):
directory, file_name = original_url.rsplit('/', 1)
self.metadata['url'] = directory + '/' + context_entry['@base'] + file_name
else:
self.metadata['url'] = context_entry['@base'] + self.metadata['url']
else:
print(f"{self.metadata} not dict")

def _normalize_metadata_csv_url(self):
""" Expands 'url' properties if necessary """
Expand All @@ -108,7 +113,7 @@ def _fetch_csvs(self):
CSVUtils.parse_csv_from_url_to_list(table['url'], self._delimiter(table)),
self.metadata['tables']))
else:
self.csvs = [CSVUtils.parse_csv_from_url_to_list(self.metadata['url'], self._delimiter(self.metadata))]
self.csvs = [CSVUtils.parse_csv_from_url_to_list(self.metadata.get('url'), self._delimiter(self.metadata))]

@staticmethod
def _delimiter(metadata):
Expand Down Expand Up @@ -201,7 +206,9 @@ def _normalize_column_names(self):
for i, column in enumerate(table['tableSchema']['columns'], start=1):
if 'name' not in column:
language = JSONLDUtils.language(self.metadata['@context'], table)
titles = column['titles'] if type(column['titles']) is list else [column['titles']]
titles = column.get('titles',[])
if type(titles) is not list:
titles = [titles]
if language is None:
column['name'] = DOPUtils.natural_language_first_value(titles)
else:
Expand Down Expand Up @@ -270,7 +277,7 @@ def _set_default_values(self):
for i, column_metadata in enumerate(table_metadata['tableSchema']['columns']):
if 'default' in column_metadata:
for row in csv:
if row[i] == '':
if i < len(row) and row[i] == '':
row[i] = column_metadata['default']

def _normalize_numbers_notation(self):
Expand Down
20 changes: 14 additions & 6 deletions csvwlib/converter/ToRDFConverter.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,23 @@ def parse_virtual_columns(self, row_node, atdm_row, table_metadata):
continue
subject = URIRef(UriTemplateUtils.insert_value(virtual_column['aboutUrl'], atdm_row, '',
table_metadata['url']))

predicate = Namespaces.get_term(virtual_column['propertyUrl'])
obj = UriTemplateUtils.insert_value(virtual_column['valueUrl'], atdm_row, '', table_metadata['url'])
obj = CommonProperties.expand_property_if_possible(obj)
self.graph.add((subject, predicate, URIRef(obj)))
if self.mode == CONST_STANDARD_MODE:
self.graph.add((row_node, CSVW.describes, subject))
if predicate:

if 'valueUrl' in virtual_column:
obj = UriTemplateUtils.insert_value(virtual_column['valueUrl'], atdm_row, '', table_metadata['url'])
obj = CommonProperties.expand_property_if_possible(obj)
self.graph.add((subject, predicate, URIRef(obj)))
elif 'default' in virtual_column:
self.graph.add((subject, predicate, self._object_node(virtual_column['default'], virtual_column, atdm_row, '')))
if self.mode == CONST_STANDARD_MODE:
self.graph.add((row_node, CSVW.describes, subject))
else:
print(f"term {virtual_column['propertyUrl']} not in namespaces")

def _add_file_metadata(self, metadata, node):
language = JSONLDUtils.language(self.metadata['@context'])
language = JSONLDUtils.language(self.metadata.get('@context',[]))
for key, value in metadata.items():
if CommonProperties.is_common_property(key) or key == 'notes':
triples = CommonProperties.property_to_triples((key, metadata[key]), node, language)
Expand Down
2 changes: 1 addition & 1 deletion csvwlib/utils/DOPUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@ def natural_language_first_value(property_value):
if type(property_value) is str:
return property_value
elif type(property_value) is list:
return property_value[0]
return next(iter(property_value or []), None)
19 changes: 16 additions & 3 deletions csvwlib/utils/MetadataLocator.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json as jsonlib

import requests
import requests, os


from csvwlib.utils.metadata import MetadataValidator
from csvwlib.utils.url.WellKnownUriResolver import WellKnownUriResolver
Expand All @@ -11,7 +12,20 @@ class MetadataLocator:
@staticmethod
def find_and_get(csv_url, metadata_url=None):
if metadata_url is not None:
return jsonlib.loads(requests.get(metadata_url).content.decode())
if isinstance(metadata_url, dict): # md already parsed
return jsonlib.loads(jsonlib.dumps(metadata_url))
try:
md = jsonlib.loads(metadata_url) # expect json?
if not isinstance(md,dict):
raise Exception('metadata not dict')
return md
except ValueError as e:
if metadata_url.startswith('http'): # if url
return jsonlib.loads(requests.get(metadata_url).content.decode())
elif os.path.exists(metadata_url): # expect local file?
with open(metadata_url,"r") as f:
return jsonlib.loads(f.read())
return None

response = requests.head(csv_url)
if 'Link' in response.headers and 'describedby' in response.links:
Expand All @@ -24,7 +38,6 @@ def find_and_get(csv_url, metadata_url=None):
metadata = MetadataLocator._retrieve_from_site_wide_conf(csv_url)
if metadata is not None:
return metadata

if '?' in csv_url:
csv_url, query = csv_url.split('?')
metadata_url = csv_url + '-metadata.json'
Expand Down
12 changes: 10 additions & 2 deletions csvwlib/utils/json/CommonProperties.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,17 @@ def expand_property_if_possible(prop):
return prop

prefix, prop = prop.split(':')
return Namespaces.get(prefix).term(prop)
return CommonProperties.ns_has_term(prefix, prop)

@staticmethod
def expand_property(prop):
prefix, prop = prop.split(':')
return Namespaces.get(prefix).term(prop)
return CommonProperties.ns_has_term(prefix, prop)

@staticmethod
def ns_has_term(prefix, prop):
pf = Namespaces.get(prefix)
if pf:
return pf.term(prop)
else:
raise Exception(f'Namespace {prefix} for {prop} not registered')
14 changes: 10 additions & 4 deletions csvwlib/utils/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,22 +75,26 @@ class MetadataValidator:
def __init__(self, start_url):
MetadataValidator.instance = self
self.metadata = {}
self.start_url = start_url
if isinstance(start_url,dict):
if 'url' in start_url.keys():
self.start_url = start_url['url']
else:
self.start_url = start_url
self.warnings = []
self.table = {}

def validate_metadata(self, metadata):
if metadata is None:
if metadata is None or not isinstance(metadata, dict):
return
self.metadata = metadata
if 'tableSchema' in metadata:
if 'tableSchema' in metadata.keys():
tables = [metadata]
else:
tables = metadata['tables']

for table in tables:
self.table = table
if 'tables' in metadata:
if 'tables' in metadata.keys():
self.check_member_property('tableGroup', metadata)
else:
self.check_member_property('table', metadata)
Expand All @@ -101,6 +105,8 @@ def validate_metadata(self, metadata):
self.check_titles(table)

def check_csv_reference(self, table, metadata):
if isinstance(self.start_url,dict):
return
if not self.start_url.endswith('.csv'):
return
if table['url'] != self.start_url:
Expand Down
3 changes: 2 additions & 1 deletion csvwlib/utils/rdf/OntologyUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,5 @@ def type(column_metadata):
return None

datatype = OntologyUtils._name_mappings.get(datatype, datatype)
return OntologyUtils._type_mappings.get(datatype, XSD.term(datatype))
xsd_type = getattr(XSD, datatype)
return OntologyUtils._type_mappings.get(datatype, xsd_type)
1 change: 0 additions & 1 deletion csvwlib/utils/url/UriTemplateUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ def expand_template(template: str, row: dict) -> str:
"""
def replacer(match):
var = match.group(1)
print('r',row)
if var in row.keys():
return str(row[var])
raise KeyError(f"Missing value for template variable '{var}'")
Expand Down