8000 fixes#47 Implement ability to query fields used on a worksheet (#54) · vegi1979/document-api-python@3757ada · GitHub
[go: up one dir, main page]

Skip to content

Commit 3757ada

Browse files
author
Russell Hay
authored
fixes#47 Implement ability to query fields used on a worksheet (tableau#54)
* first stab at an API, not correct in retrospect * Initial revision of api based on API discussions * adding additional testing and enabling lists to be passed in * removing left over stuff from previous attempt * Some fields are not listed in <column> tags, so we need to construct from <metadata-record> * removing thumbnail * move _column_object_from* to use a named tuple * removing cruft from previous implementation * renaming retval to something more useful * cleaning up _is_used_by_worksheet * reformatting import statements * removing PredicateDictionary * A workbook doc without any content should still load * found_in -> used_by_sheet
1 parent f46f3d9 commit 3757ada

File tree

8 files changed

+428
-35
lines changed

8 files changed

+428
-35
lines changed

tableaudocumentapi/datasource.py

Lines changed: 60 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,17 +12,54 @@
1212
from tableaudocumentapi import Field
1313
from tableaudocumentapi.multilookup_dict import MultiLookupDict
1414

15+
########
16+
# This is needed in order to determine if something is a string or not. It is necessary because
17+
# of differences between python2 (basestring) and python3 (str). If python2 support is every
18+
# dropped, remove this and change the basestring references below to str
19+
try:
20+
basestring
21+
except NameError:
22+
basestring = str
23+
########
1524

16-
def _mapping_from_xml(root_xml, column_xml):
17-
retval = Field.from_xml(column_xml)
18-
local_name = retval.id
19-
if "'" in local_name:
20-
local_name = sax.escape(local_name, {"'": "&apos;"})
21-
xpath = ".//metadata-record[@class='column'][local-name='{}']".format(local_name)
22-
metadata_record = root_xml.find(xpath)
25+
_ColumnObjectReturnTuple = collections.namedtuple('_ColumnObjectReturnTupleType', ['id', 'object'])
26+
27+
28+
def _get_metadata_xml_for_field(root_xml, field_name):
29+
if "'" in field_name:
30+
field_name = sax.escape(field_name, {"'": "&apos;"})
31+
xpath = ".//metadata-record[@class='column'][local-name='{}']".format(field_name)
32+
return root_xml.find(xpath)
33+
34+
35+
def _is_used_by_worksheet(names, field):
36+
return any((y for y in names if y in field.worksheets))
37+
38+
39+
class FieldDictionary(MultiLookupDict):
40+
def used_by_sheet(self, name):
41+
# If we pass in a string, no need to get complicated, just check to see if name is in
42+
# the field's list of worksheets
43+
if isinstance(name, basestring):
44+
return [x for x in self.values() if name in x.worksheets]
45+
46+
# if we pass in a list, we need to check to see if any of the names in the list are in
47+
# the field's list of worksheets
48+
return [x for x in self.values() if _is_used_by_worksheet(name, x)]
49+
50+
51+
def _column_object_from_column_xml(root_xml, column_xml):
52+
field_object = Field.from_column_xml(column_xml)
53+
local_name = field_object.id
54+
metadata_record = _get_metadata_xml_for_field(root_xml, local_name)
2355
if metadata_record is not None:
24-
retval.apply_metadata(metadata_record)
25-
return retval.id, retval
56+
field_object.apply_metadata(metadata_record)
57+
return _ColumnObjectReturnTuple(field_object.id, field_object)
58+
59+
60+
def _column_object_from_metadata_xml(metadata_xml):
61+
field_object = Field.from_metadata_xml(metadata_xml)
62+
return _ColumnObjectReturnTuple(field_object.id, field_object)
2663

2764

2865
class ConnectionParser(object):
@@ -73,7 +110,7 @@ def __init__(self, dsxml, filename=None):
73110

74111
@classmethod
75112
def from_file(cls, filename):
76-
"Initialize datasource from file (.tds)"
113+
"""Initialize datasource from file (.tds)"""
77114

78115
if zipfile.is_zipfile(filename):
79116
dsxml = xfile.get_xml_from_archive(filename).getroot()
@@ -141,6 +178,16 @@ def fields(self):
141178
return self._fields
142179

143180
def _get_all_fields(self):
144-
column_objects = (_mapping_from_xml(self._datasourceTree, xml)
145-
for xml in self._datasourceTree.findall('.//column'))
146-
return MultiLookupDict({k: v for k, v in column_objects})
181+
column_objects = [_column_object_from_column_xml(self._datasourceTree, xml)
182+
for xml in self._datasourceTree.findall('.//column')]
183+
existing_fields = [x.id for x in column_objects]
184+
metadata_fields = (x.text
185+
for x in self._datasourceTree.findall(".//metadata-record[@class='column']/local-name"))
186+
187+
missing_fields = (x for x in metadata_fields if x not in existing_fields)
188+
column_objects.extend((
189+
_column_object_from_metadata_xml(_get_metadata_xml_for_field(self._datasourceTree, field_name))
190+
for field_name in missing_fields
191+
))
192+
193+
return FieldDictionary({k: v for k, v in column_objects})

tableaudocumentapi/field.py

Lines changed: 54 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,12 @@
1414
'aggregation', # The type of aggregation on the field (e.g Sum, Avg)
1515
]
1616

17+
_METADATA_TO_FIELD_MAP = [
18+
('local-name', 'id'),
19+
('local-type', 'datatype'),
20+
('remote-alias', 'alias')
21+
]
22+
1723

1824
def _find_metadata_record(record, attrib):
1925
element = record.find('.//{}'.format(attrib))
@@ -25,25 +31,60 @@ def _find_metadata_record(record, attrib):
2531
class Field(object):
2632
""" Represents a field in a datasource """
2733

28-
def __init__(self, xmldata):
29-
for attrib in _ATTRIBUTES:
30-
self._apply_attribute(xmldata, attrib, lambda x: xmldata.attrib.get(x, None))
34+
def __init__(self, column_xml=None, metadata_xml=None):
3135

32-
# All metadata attributes begin at None
36+
# Initialize all the possible attributes
37+
for attrib in _ATTRIBUTES:
38+
setattr(self, '_{}'.format(attrib), None)
3339
for attrib in _METADATA_ATTRIBUTES:
3440
setattr(self, '_{}'.format(attrib), None)
41+
self._worksheets = set()
42+
43+
if column_xml is not None:
44+
self._initialize_from_column_xml(column_xml)
45+
if metadata_xml is not None:
46+
self.apply_metadata(metadata_xml)
47+
48+
elif metadata_xml is not None:
49+
self._initialize_from_metadata_xml(metadata_xml)
50+
51+
else:
52+
raise AttributeError('column_xml or metadata_xml needed to initialize field')
53+
54+
def _initialize_from_column_xml(self, xmldata):
55+
for attrib in _ATTRIBUTES:
56+
self._apply_attribute(xmldata, attrib, lambda x: xmldata.attrib.get(x, None))
57+
58+
def _initialize_from_metadata_xml(self, xmldata):
59+
for metadata_name, field_name in _METADATA_TO_FIELD_MAP:
60+
self._apply_attribute(xmldata, field_name, lambda x: xmldata.find('.//{}'.format(metadata_name)).text,
61+
read_name=metadata_name)
62+
self.apply_metadata(xmldata)
3563

64+
########################################
65+
# Special Case methods for construction fields from various sources
66+
# not intended for client use
67+
########################################
3668
def apply_metadata(self, metadata_record):
3769
for attrib in _METADATA_ATTRIBUTES:
3870
self._apply_attribute(metadata_record, attrib, functools.partial(_find_metadata_record, metadata_record))
3971

72+
def add_used_in(self, name):
73+
self._worksheets.add(name)
74+
4075
@classmethod
41-
def from_xml(cls, xmldata):
42-
return cls(xmldata)
76+
def from_column_xml(cls, xmldata):
77+
return cls(column_xml=xmldata)
4378

44-
def _apply_attribute(self, xmldata, attrib, default_func):
45-
if hasattr(self, '_read_{}'.format(attrib)):
46-
value = getattr(self, '_read_{}'.format(attrib))(xmldata)
79+
@classmethod
80+
def from_metadata_xml(cls, xmldata):
81+
return cls(metadata_xml=xmldata)
82+
83+
def _apply_attribute(self, xmldata, attrib, default_func, read_name=None):
84+
if read_name is None:
85+
read_name = attrib
86+
if hasattr(self, '_read_{}'.format(read_name)):
87+
value = getattr(self, '_read_{}'.format(read_name))(xmldata)
4788
else:
4889
value = default_func(attrib)
4990

@@ -121,6 +162,10 @@ def default_aggregation(self):
121162
""" The default type of aggregation on the field (e.g Sum, Avg)"""
122163
return self._aggregation
123164

165+
@property
166+
def worksheets(self):
167+
return list(self._worksheets)
168+
124169
######################################
125170
# Special Case handling methods for reading the values from the XML
126171
######################################

tableaudocumentapi/workbook.py

Lines changed: 52 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,12 @@
55
###############################################################################
66
import os
77
import zipfile
8+
import weakref
89

910
import xml.etree.ElementTree as ET
1011

1112
from tableaudocumentapi import Datasource, xfile
1213

13-
###########################################################################
14-
#
15-
# Utility Functions
16-
#
17-
###########################################################################
18-
1914

2015
class Workbook(object):
2116
"""
@@ -33,6 +28,7 @@ def __init__(self, filename):
3328
Constructor.
3429
3530
"""
31+
3632
self._filename = filename
3733

3834
# Determine if this is a twb or twbx and get the xml root
@@ -47,13 +43,26 @@ def __init__(self, filename):
4743
self._datasources = self._prepare_datasources(
4844
self._workbookRoot) # self.workbookRoot.find('datasources')
4945

46+
self._datasource_index = self._prepare_datasource_index(self._datasources)
47+
48+
self._worksheets = self._prepare_worksheets(
49+
self._workbookRoot, self._datasource_index
50+
)
51+
5052
###########
5153
# datasources
5254
###########
5355
@property
5456
def datasources(self):
5557
return self._datasources
5658

59+
###########
60+
# worksheets
61+
###########
62+
@property
63+
def worksheets(self):
64+
return self._worksheets
65+
5766
###########
5867
# filename
5968
###########
@@ -95,12 +104,47 @@ def save_as(self, new_filename):
95104
# Private API.
96105
#
97106
###########################################################################
98-
def _prepare_datasources(self, xmlRoot):
107+
@staticmethod
108+
def _prepare_datasource_index(datasources):
109+
retval = weakref.WeakValueDictionary()
110+
for datasource in datasources:
111+
retval[datasource.name] = datasource
112+
113+
return retval
114+
115+
@staticmethod
116+
def _prepare_datasources(xml_root):
99117
datasources = []
100118

101119
# loop through our datasources and append
102-
for datasource in xmlRoot.find('datasources'):
120+
datasource_elements = xml_root.find('datasources')
121+
if datasource_elements is None:
122+
return []
123+
124+
for datasource in datasource_elements:
103125
ds = Datasource(datasource)
104126
datasources.append(ds)
105127

106128
return datasources
129+
130+
@staticmethod
131+
def _prepare_worksheets(xml_root, ds_index):
132+
worksheets = []
133+
worksheets_element = xml_root.find('.//worksheets')
134+
if worksheets_element is None:
135+
return worksheets
136+
137+
for worksheet_element in worksheets_element:
138+
worksheet_name = worksheet_element.attrib['name']
139+
worksheets.append(worksheet_name) # TODO: A real worksheet object, for now, only name
140+
141+
dependencies = worksheet_element.findall('.//datasource-dependencies')
142+
143+
for dependency in dependencies:
144+
datasource_name = dependency.attrib['datasource']
145+
datasource = ds_index[datasource_name]
146+
for column in dependency.findall('.//column'):
147+
column_name = column.attrib['name']
148+
datasource.fields[column_name].add_used_in(worksheet_name)
149+
150+
return worksheets

test/assets/TABLEAU_10_TWB.twb

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,22 @@
1-
<?xml version='1.0' encoding='utf-8' ?><workbook source-build='0.0.0 (0000.16.0510.1300)' source-platform='mac' version='10.0' xmlns:user='http://www.tableausoftware.com/xml/user'><datasources><datasource caption='xy+ (Multiple Connections)' inline='true' name='federated.1s4nxn20cywkdv13ql0yk0g1mpdx' version='10.0'><connection class='federated'><named-connections><named-connection caption='mysql55.test.tsi.lan' name='mysql.1ewmkrw0mtgsev1dnurma1blii4x'><connection class='mysql' dbname='testv1' odbc-native-protocol='yes' port='3306' server='mysql55.test.tsi.lan' source-charset='' username='test' /></named-connection><named-connection caption='mssql2012.test.tsi.lan' name='sqlserver.1erdwp01uqynlb14ul78p0haai2r'><connection authentication='sqlserver' class='sqlserver' dbname='TestV1' odbc-native-protocol='yes' one-time-sql='' server='mssql2012.test.tsi.lan' username='test' /></named-connection></named-connections></connection></datasource></datasources></workbook>
1+
<?xml version='1.0' encoding='utf-8' ?>
2+
<workbook source-build='0.0.0 (0000.16.0510.1300)' sourc B108 e-platform='mac' version='10.0'
3+
xmlns:user='http://www.tableausoftware.com/xml/user'>
4+
<datasources>
5+
<datasource caption='xy+ (Multiple Connections)' inline='true' name='federated.1s4nxn20cywkdv13ql0yk0g1mpdx'
6+
version='10.0'>
7+
<connection class='federated'>
8+
<named-connections>
9+
<named-connection caption='mysql55.test.tsi.lan' name='mysql.1ewmkrw0mtgsev1dnurma1blii4x'>
10+
<connection class='mysql' dbname='testv1' odbc-native-protocol='yes' port='3306'
11+
server='mysql55.test.tsi.lan' source-charset='' username='test'/>
12+
</named-connection>
13+
<named-connection caption='mssql2012.test.tsi.lan' name='sqlserver.1erdwp01uqynlb14ul78p0haai2r'>
14+
<connection authentication='sqlserver' class='sqlserver' dbname='TestV1'
15+
odbc-native-protocol='yes' one-time-sql='' server='mssql2012.test.tsi.lan'
16+
username='test'/>
17+
</named-connection>
18+
</named-connections>
19+
</connection>
20+
</datasource>
21+
</datasources>
22+
</workbook>

0 commit comments

Comments
 (0)
0