8000 Initial attempt at enabling reading the columns from the datasource (… · csweezy/document-api-python@481f38c · GitHub
[go: up one dir, main page]

Skip to content

Commit 481f38c

Browse files
author
Russell Hay
authored
Initial attempt at enabling reading the columns from the datasource (tableau#45)
Fixes tableau#42 tableau#46 * Initial attempt at enabling reading the columns from the datasource * Fixing pep8 errors for EOFEOL * Changing to OrderedDict for getting columns * Add documentation for the various column attributes * rename column to field * Fixed tableau#46 encode apostrophes in field names * Enable multilook up for Fields * Rename properties on the field based on feedback given in tableau#45
1 parent aba2a35 commit 481f38c

File tree

9 files changed

+337
-4
lines changed

9 files changed

+337
-4
lines changed

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setup(
77
name='tableaudocumentapi',
8-
version='0.1.0-dev',
8+
version='0.1.0.dev0',
99
author='Tableau Software',
1010
author_email='github@tableau.com',
1111
url='https://github.com/tableau/document-api-python',

tableaudocumentapi/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1+
from .field import Field
12
from .connection import Connection
23
from .datasource import Datasource, ConnectionParser
34
from .workbook import Workbook
5+
46
__version__ = '0.0.1'
57
__VERSION__ = __version__

tableaudocumentapi/datasource.py

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,29 @@
33
# Datasource - A class for writing datasources to Tableau files
44
#
55
###############################################################################
6-
import os
6+
import collections
7+
import xml.etree.ElementTree as ET
8+
import xml.sax.saxutils as sax
79
import zipfile
810

9-
import xml.etree.ElementTree as ET
1011
from tableaudocumentapi import Connection, xfile
12+
from tableaudocumentapi import Field
13+
from tableaudocumentapi.multilookup_dict import MultiLookupDict
1114

1215

13-
class ConnectionParser(object):
16+
def _mapping_from_xml(root_xml, column_xml):
17+
retval = Field.from_xml(column_xml)
18+
local_name = retval.id
19+
if "'" in local_name:
20+
local_name = sax.escape(local_name, {"'": "'"})
21+
xpath = ".//metadata-record[@class='column'][local-name='{}']".format(local_name)
22+
metadata_record = root_xml.find(xpath)
23+
if metadata_record is not None:
24+
retval.apply_metadata(metadata_record)
25+
return retval.id, retval
1426

27+
28+
class ConnectionParser(object):
1529
def __init__(self, datasource_xml, version):
1630
self._dsxml = datasource_xml
1731
self._dsversion = version
@@ -55,6 +69,7 @@ def __init__(self, dsxml, filename=None):
5569
self._connection_parser = ConnectionParser(
5670
self._datasourceXML, version=self._version)
5771
self._connections = self._connection_parser.get_connections()
72+
self._fields = None
5873

5974
@classmethod
6075
def from_file(cls, filename):
@@ -115,3 +130,17 @@ def version(self):
115130
@property
116131
def connections(self):
117132
return self._connections
133+
134+
###########
135+
# fields
136+
###########
137+
@property
138+
def fields(self):
139+
if not self._fields:
140+
self._fields = self._get_all_fields()
141+
return self._fields
142+
143+
def _get_all_fields(self):
144+
column_objects = (_mapping_from_xml(self._datasourceTree, xml)
145+
for xml in self._datasourceTree.findall('.//column'))
146+
return MultiLookupDict({k: v for k, v in column_objects})

tableaudocumentapi/field.py

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
import functools
2+
3+
_ATTRIBUTES = [
4+
'id', # Name of the field as specified in the file, usually surrounded by [ ]
5+
'caption', # Name of the field as displayed in Tableau unless an aliases is defined
6+
'datatype', # Type of the field within Tableau (string, integer, etc)
7+
'role', # Dimension or Measure
8+
'type', # three possible values: quantitative, ordinal, or nominal
9+
'alias', # Name of the field as displayed in Tableau if the default name isn't wanted
10+
'calculation', # If this field is a calculated field, this will be the formula
11+
]
12+
13+
_METADATA_ATTRIBUTES = [
14+
'aggregation', # The type of aggregation on the field (e.g Sum, Avg)
15+
]
16+
17+
18+
def _find_metadata_record(record, attrib):
19+
element = record.find('.//{}'.format(attrib))
20+
if element is None:
21+
return None
22+
return element.text
23+
24+
25+
class Field(object):
26+
""" Represents a field in a datasource """
27+
28+
def __init__(self, xmldata):
29+
for attrib in _ATTRIBUTES:
30+
self._apply_attribute(xmldata, attrib, lambda x: xmldata.attrib.get(x, None))
31+
32+
# All metadata attributes begin at None
33+
for attrib in _METADATA_ATTRIBUTES:
34+
setattr(self, '_{}'.format(attrib), None)
35+
36+
def apply_metadata(self, metadata_record):
37+
for attrib in _METADATA_ATTRIBUTES:
38+
self._apply_attribute(metadata_record, attrib, functools.partial(_find_metadata_record, metadata_record))
39+
40+
@classmethod
41+
def from_xml(cls, xmldata):
42+
return cls(xmldata)
43+
44+
def __getattr__(self, item):
45+
private_name = '_{}'.format(item)
46+
if item in _ATTRIBUTES or item in _METADATA_ATTRIBUTES:
47+
return getattr(self, private_name)
48+
raise AttributeError(item)
49+
50+
def _apply_attribute(self, xmldata, attrib, default_func):
51+
if hasattr(self, '_read_{}'.format(attrib)):
52+
value = getattr(self, '_read_{}'.format(attrib))(xmldata)
53+
else:
54+
value = default_func(attrib)
55+
56+
setattr(self, '_{}'.format(attrib), value)
57+
58+
@property
59+
def name(self):
60+
""" Provides a nice name for the field which is derived from the alias, caption, or the id.
61+
62+
The name resolves as either the alias if it's defined, or the caption if alias is not defined,
63+
and finally the id which is the underlying name if neither of the fields exist. """
64+
alias = getattr(self, 'alias', None)
65+
if alias:
66+
return alias
67+
68+
caption = getattr(self, 'caption', None)
69+
if caption:
70+
return caption
71+
72+
return self.id
73+
74+
######################################
75+
# Special Case handling methods for reading the values from the XML
76+
######################################
77+
@staticmethod
78+
def _read_id(xmldata):
79+
# ID is actually the name of the field, but to provide a nice name, we call this ID
80+
return xmldata.attrib.get('name', None)
81+
82+
@staticmethod
83+
def _read_calculation(xmldata):
84+
# The formula for a calculation is stored in a child element, so we need to pull it out separately.
85+
calc = xmldata.find('.//calculation')
86+
if calc is None:
87+
return None
88+
89+
return calc.attrib.get('formula', None)
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
def _resolve_value(key, value):
2+
try:
3+
retval = value.get(key, None)
4+
if retval is None:
5+
retval = value.getattr(key, None)
6+
except AttributeError:
7+
retval = None
8+
return retval
9+
10+
11+
def _build_index(key, d):
12+
return {_resolve_value(key, v): k
13+
for k, v in d.items()
14+
if _resolve_value(key, v) is not None}
15+
16+
17+
# TODO: Improve this to be more generic
18+
class MultiLookupDict(dict):
19+
def __init__(self, args=None):
20+
if args is None:
21+
args = {}
22+
super(MultiLookupDict, self).__init__(args)
23+
self._indexes = {
24+
'alias': {},
25+
'caption': {}
26+
}
27+
self._populate_indexes()
28+
29+
def _populate_indexes(self):
30+
self._indexes['alias'] = _build_index('alias', self)
31+
self._indexes['caption'] = _build_index('caption', self)
32+
33+
def __setitem__(self, key, value):
34+
alias = _resolve_value('alias', value)
35+
caption = _resolve_value('caption', value)
36+
if alias is not None:
37+
self._indexes['alias'][alias] = key
38+
if caption is not None:
39+
self._indexes['caption'][caption] = key
40+
41+
dict.__setitem__(self, key, value)
42+
43+
def __getitem__(self, key):
44+
if key in self._indexes['alias']:
45+
key = self._indexes['alias'][key]
46+
elif key in self._indexes['caption']:
47+
key = self._indexes['caption'][key]
48+
49+
return dict.__getitem__(self, key)

test/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
from . import bvt
2+
from . import test_datasource

test/assets/datasource_test.tds

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
<?xml version='1.0' encoding='utf-8' ?>
2+
<datasource formatted-name='postgres.1of3kl00aoax5d1a1ejma1397430' inline='true' source-platform='mac' version='9.3' xmlns:user='http://www.tableausoftware.com/xml/user'>
3+
<connection authentication='username-password' class='postgres' dbname='TestV1' odbc-native-protocol='yes' port='5432' server='postgres91.test.tsi.lan' username='test'>
4+
<relation name='xy' table='[public].[xy]' type='table' />
5+
<metadata-records>
6+
<metadata-record class='column'>
7+
<remote-name>a</remote-name>
8+
<remote-type>130</remote-type>
9+
<local-name>[a]</local-name>
10+
<parent-name>[xy]</parent-name>
11+
<remote-alias>a</remote-alias>
12+
<ordinal>1</ordinal>
13+
<local-type>string</local-type>
14+
<aggregation>Count</aggregation>
15+
<width>255</width>
16+
<contains-null>true</contains-null>
17+
<attributes>
18+
<attribute datatype='string' name='DebugRemoteType'>&quot;SQL_WVARCHAR&quot;</attribute>
19+
<attribute datatype='string' name='DebugWireType'>&quot;SQL_C_WCHAR&quot;</attribute>
20+
<attribute datatype='string' name='TypeIsVarchar'>&quot;true&quot;</attribute>
21+
</attributes>
22+
</metadata-record>
23+
<metadata-record class='column'>
24+
<remote-name>Today's Date</remote-name>
25+
<remote-type>130</remote-type>
26+
<local-name>[Today&apos;s Date]</local-name>
27+
<parent-name>[xy]</parent-name>
28+
<remote-alias>a</remote-alias>
29+
<ordinal>1</ordinal>
30+
<local-type>string</local-type>
31+
<aggregation>Count</aggregation>
32+
<width>255</width>
33+
<contains-null>true</contains-null>
34+
<attributes>
35+
<attribute datatype='string' name='DebugRemoteType'>&quot;SQL_WVARCHAR&quot;</attribute>
36+
<attribute datatype='string' name='DebugWireType'>&quot;SQL_C_WCHAR&quot;</attribute>
37+
<attribute datatype='string' name='TypeIsVarchar'>&quot;true&quot;</attribute>
38+
</attributes>
39+
</metadata-record>
40+
<metadata-record class='column'>
41+
<remote-name>x</remote-name>
42+
<remote-type>3</remote-type>
43+
<local-name>[x]</local-name>
44+
<parent-name>[xy]</parent-name>
45+
<remote-alias>x</remote-alias>
46+
<ordinal>2</ordinal>
47+
<local-type>integer</local-type>
48+
<aggregation>Sum</aggregation>
49+
<precision>10</precision>
50+
<contains-null>true</contains-null>
51+
<attributes>
52+
<attribute datatype='string' name='DebugRemoteType'>&quot;SQL_INTEGER&quot;</attribute>
53+
<attribute datatype='string' name='DebugWireType'>&quot;SQL_C_SLONG&quot;</attribute>
54+
</attributes>
55+
</metadata-record>
56+
<metadata-record class='column'>
57+
<remote-name>y</remote-name>
58+
<remote-type>3</remote-type>
59+
<local-name>[y]</local-name>
60+
<parent-name>[xy]</parent-name>
61+
<remote-alias>y</remote-alias>
62+
<ordinal>3</ordinal>
63+
<local-type>integer</local-type>
64+
<aggregation>Sum</aggregation>
65+
<precision>10</precision>
66+
<contains-null>true</contains-null>
67+
<attributes>
68+
<attribute datatype='string' name='DebugRemoteType'>&quot;SQL_INTEGER&quot;</attribute>
69+
<attribute datatype='string' name='DebugWireType'>&quot;SQL_C_SLONG&quot;</attribute>
70+
</attributes>
71+
</metadata-record>
72+
</metadata-records>
73+
</connection>
74+
<aliases enabled='yes' />
75+
<column datatype='integer' name='[Number of Records]' role='measure' type='quantitative' user:auto-column='numrec'>
76+
<calculation class='tableau' formula='1' />
77+
</column>
78+
<column caption='A' datatype='string' name='[a]' role='dimension' type='nominal' />
79+
<column caption='Today&apos;s Date' datatype='string' name='[Today&apos;s Date]' role='dimension' type='nominal' />
80+
<column caption='X' datatype='integer' name='[x]' role='measure' type='quantitative' />
81+
<column caption='Y' datatype='integer' name='[y]' role='measure' type='quantitative' />
82+
<layout dim-ordering='alphabetic' dim-percentage='0.5' measure-ordering='alphabetic' measure-percentage='0.5' show-structure='true' />
83+
<semantic-values>
84+
<semantic-value key='[Country].[Name]' value='&quot;United States&quot;' />
85+
</semantic-values>
86+
</datasource>

test/test_datasource.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import unittest
2+
import os.path
3+
import functools
4+
5+
from tableaudocumentapi import Datasource
6+
7+
TEST_TDS_FILE = os.path.join(
8+
os.path.dirname(__file__),
9+
'assets',
10+
'datasource_test.tds'
11+
)
12+
13+
14+
class DataSourceFields(unittest.TestCase):
15+
def setUp(self):
16+
self.ds = Datasource.from_file(TEST_TDS_FILE)
17+
18+
def test_datasource_returns_correct_fields(self):
19+
self.assertIsNotNone(self.ds.fields)
20+
self.assertIsNotNone(self.ds.fields.get('[Number of Records]', None))
21+
22+
def test_datasource_returns_calculation_from_fields(self):
23+
self.assertEqual('1', self.ds.fields['[Number of Records]'].calculation)
24+
25+
def test_datasource_uses_metadata_record(self):
26+
self.assertEqual('Sum', self.ds.fields['[x]'].aggregation)
27+
28+
def test_datasource_column_name_contains_apostrophy(self):
29+
self.assertIsNotNone(self.ds.fields.get("[Today's Date]", None))

test/test_multidict.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import unittest
2+
import os.path
3+
import functools
4+
5+
from tableaudocumentapi.multilookup_dict import MultiLookupDict
6+
7+
8+
class MLDTests(unittest.TestCase):
9+
def setUp(self):
10+
self.mld = MultiLookupDict({
11+
'[foo]': {
12+
'alias': 'bar',
13+
'caption': 'baz',
14+
'value': 1
15+
},
16+
'[bar]': {
17+
'caption': 'foo',
18+
'value': 2
19+
},
20+
'[baz]': {
21+
'value': 3
22+
}
23+
})
24+
25+
def test_multilookupdict_name_only(self):
26+
actual = self.mld['[baz]']
27+
self.assertEqual(3, actual['value'])
28+
29+
def test_multilookupdict_alias_overrides_everything(self):
30+
actual = self.mld['bar']
31+
self.assertEqual(1, actual['value'])
32+
33+
def test_mutlilookupdict_caption_overrides_id(self):
34+
actual = self.mld['foo']
35+
self.assertEqual(2, actual['value'])
36+
37+
def test_mutlilookupdict_can_still_find_id_even_with_alias(self):
38+
actual = self.mld['[foo]']
39+
self.assertEqual(1, actual['value'])
40+
41+
def test_mutlilookupdict_can_still_find_caption_even_with_alias(self):
42+
actual = self.mld['baz']
43+
self.assertEqual(1, actual['value'])
44+
45+
def test_mutlilookupdict_can_still_find_id_even_with_caption(self):
46+
actual = self.mld['[bar]']
47+
self.assertEqual(2, actual['value'])

0 commit comments

Comments
 (0)
0