8000 Add German Steuernummer · AmarisAI/python-stdnum@647dfea · GitHub
[go: up one dir, main page]

Skip to content

Commit 647dfea

Browse files
committed
Add German Steuernummer
Based on the implementation provided by Mohammed Salman of Holvi. This is the old tax number that is being replaced by the Steuerliche Identifikationsnummer. The number has a regional form (which is used most often) and a national form. Closes arthurdejong#49
1 parent 6e30cf5 commit 647dfea

File tree

2 files changed

+392
-0
lines changed

2 files changed

+392
-0
lines changed

stdnum/de/stnr.py

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
# steuernummer.py - functions for handling German tax numbers
2+
# coding: utf-8
3+
#
4+
# Copyright (C) 2017 Holvi Payment Services
5+
# Copyright (C) 2018 Arthur de Jong
6+
#
7+
# This library is free software; you can redistribute it and/or
8+
# modify it under the terms of the GNU Lesser General Public
9+
# License as published by the Free Software Foundation; either
10+
# version 2.1 of the License, or (at your option) any later version.
11+
#
12+
# This library is distributed in the hope that it will be useful,
13+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15+
# Lesser General Public License for more details.
16+
#
17+
# You should have received a copy of the GNU Lesser General Public
18+
# License along with this library; if not, write to the Free Software
19+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20+
# 02110-1301 USA
21+
22+
"""St.-Nr. (Steuernummer, German tax number).
23+
24+
The Steuernummer (St.-Nr.) is a tax number assigned by regional tax offices
25+
to taxable individuals and organisations. The number is being replaced by the
26+
Steuerliche Identifikationsnummer (IdNr).
27+
28+
The number has 10 or 11 digits for the regional form (per Bundesland) and 13
29+
digits for the number that is unique within Germany. The number consists of
30+
(part of) the Bundesfinanzamtsnummer (BUFA-Nr.), a district number, a serial
31+
number and a check digit.
32+
33+
More information:
34+
35+
* https://de.wikipedia.org/wiki/Steuernummer
36+
37+
>>> validate(' 181/815/0815 5')
38+
'18181508155'
39+
>>> validate('201/123/12340', 'Sachsen')
40+
'20112312340'
41+
>>> validate('4151081508156', 'Thuringen')
42+
'4151081508156'
43+
>>> validate('4151181508156', 'Thuringen')
44+
Traceback (most recent call last):
45+
...
46+
InvalidFormat: ...
47+
>>> validate('136695978')
48+
Traceback (most recent call last):
49+
...
50+
InvalidLength: ...
51+
"""
52+
53+
import re
54+
55+
from stdnum.exceptions import *
56+
from stdnum.util import clean
57+
58+
59+
# The number formats per region (regional and country-wide format)
60+
_number_formats_per_region = {
61+
'Baden-Württemberg': ['FFBBBUUUUP', '28FF0BBBUUUUP'],
62+
'Bayern': ['FFFBBBUUUUP', '9FFF0BBBUUUUP'],
63+
'Berlin': ['FFBBBUUUUP', '11FF0BBBUUUUP'],
64+
'Brandenburg': ['0FFBBBUUUUP', '30FF0BBBUUUUP'],
65+
'Bremen': ['FFBBBUUUUP', '24FF0BBBUUUUP'],
66+
'Hamburg': ['FFBBBUUUUP', '22FF0BBBUUUUP'],
67+
'Hessen': ['0FFBBBUUUUP', '26FF0BBBUUUUP'],
68+
'Mecklenburg-Vorpommern': ['0FFBBBUUUUP', '40FF0BBBUUUUP'],
69+
'Niedersachsen': ['FFBBBUUUUP', '23FF0BBBUUUUP'],
70+
'Nordrhein-Westfalen': ['FFFBBBBUUUP', '5FFF0BBBBUUUP'],
71+
'Rheinland-Pfalz': ['FFBBBUUUUP', '27FF0BBBUUUUP'],
72+
'Saarland': ['0FFBBBUUUUP', '10FF0BBBUUUUP'],
73+
'Sachsen': ['2FFBBBUUUUP', '32FF0BBBUUUUP'],
74+
'Sachsen-Anhalt': ['1FFBBBUUUUP', '31FF0BBBUUUUP'],
75+
'Schleswig-Holstein': ['FFBBBUUUUP', '21FF0BBBUUUUP'],
76+
'Thüringen': ['1FFBBBUUUUP', '41FF0BBBUUUUP'],
77+
}
78+
79+
REGIONS = sorted(_number_formats_per_region.keys())
80+
"""Valid regions recognised by this module."""
81+
82+
83+
def _clean_region(region):
84+
"""Convert the region name to something that we can use for comparison
85+
without running into encoding issues."""
86+
return ''.join(
87+
x for x in region.lower()
88+
if x in 'abcdefghijklmnopqrstvwxyz')
89+
90+
91+
class _Format(object):
92+
93+
def __init__(self, fmt):
94+
self._fmt = fmt
95+
self._re = re.compile('^%s$' % re.sub(
96+
r'([FBUP])\1*',
97+
lambda x: r'(\d{%d})' % len(x.group(0)), fmt))
98+
99+
def match(self, number):
100+
return self._re.match(number)
101+
102+
def replace(self, f, b, u, p):
103+
items = iter([f, b, u, p])
104+
return re.sub(r'([FBUP])\1*', lambda x: next(items), self._fmt)
105+
106+
107+
# Convert the structure to something that we can easily use
108+
_number_formats_per_region = dict(
109+
(_clean_region(region), [
110+
region, _Format(formats[0]), _Format(formats[1])])
111+
for region, formats in _number_formats_per_region.items())
112+
113+
114+
def _get_formats(region=None):
115+
"""Return the formats for the region."""
116+
if region:
117+
region = _clean_region(region)
118+
if region not in _number_formats_per_region:
119+
raise InvalidComponent()
120+
return [_number_formats_per_region[region]]
121+
return _number_formats_per_region.values()
122+
123+
124+
def compact(number):
125+
"""Convert the number to the minimal representation. This strips the
126+
number of any valid separators and removes surrounding whitespace."""
127+
return clean(number, ' -./,').strip()
128+
129+
130+
def validate(number, region=None):
131+
"""Check if the number is a valid tax number. This checks the length and
132+
formatting. The region can be supplied to verify that the number is
133+
assi F438 gned in that region."""
134+
number = compact(number)
135+
if not number.isdigit():
136+
raise InvalidFormat()
137+
if len(number) not in (10, 11, 13):
138+
raise InvalidLength()
139+
if not any(region_fmt.match(number) or country_fmt.match(number)
140+
for region, region_fmt, country_fmt in _get_formats(region)):
141+
raise InvalidFormat()
142+
return number
143+
144+
145+
def is_valid(number, region=None):
146+
"""Check if the number is a valid tax number. This checks the length and
147+
formatting. The region can be supplied to verify that the number is
148+
assigned in that region."""
149+
try:
150+
return bool(validate(number, region))
151+
except ValidationError:
152+
return False
153+
154+
155+
def guess_regions(number):
156+
"""Return a list of regions this number is valid for."""
157+
number = compact(number)
158+
return sorted(
159+
region for region, region_fmt, country_fmt in _get_formats()
160+
if region_fmt.match(number) or country_fmt.match(number))
161+
162+
163+
def to_regional_number(number):
164+
"""Convert the number to a regional (10 or 11 digit) number."""
165+
number = compact(number)
166+
for region, region_fmt, country_fmt in _get_formats():
167+
m = country_fmt.match(number)
168+
if m:
169+
return region_fmt.replace(*m.groups())
170+
raise InvalidFormat()
171+
172+
173+
def to_country_number(number, region=None):
174+
"""Convert the number to the nationally unique number. The region is
175+
needed if the number is not only valid for one particular region."""
176+
number = compact(number)
177+
formats = (
178+
(region_fmt.match(number), country_fmt)
179+
for region, region_fmt, country_fmt in _get_formats(region))
180+
formats = [
181+
(region_match, country_fmt)
182+
for region_match, country_fmt in formats
183+
if region_match]
184+
if not formats:
185+
raise InvalidFormat()
186+
if len(formats) != 1:
187+
raise InvalidComponent()
188+
return formats[0][1].replace(*formats[0][0].groups())
189+
190+
191+
def format(number, region=None):
192+
"""Reformat the passed number to the standard format."""
193+
number = compact(number)
194+
for region, region_fmt, country_fmt in _get_formats(region):
195+
m = region_fmt.match(number)
196+
if m:
197+
f, b, u, p = m.groups()
198+
return region_fmt.replace(f + '/', b + '/', u, p)
199+
return number

tests/test_de_stnr.doctest

Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
test_de_stnr.doctest - more detailed doctests for the stdnum.de.stnr module
2+
3+
Copyright (C) 2017 Holvi Payment Services
4+
Copyright (C) 2018 Arthur de Jong
5+
6+
This library is free software; you can redistribute it and/or
7+
modify it under the terms of the GNU Lesser General Public
8+
License as published by the Free Software Foundation; either
9+
version 2.1 of the License, or (at your option) any later version.
10+
11+
This library is distributed in the hope that it will be useful,
12+
but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
Lesser General Public License for more details.
15+
16+
You should have received a copy of the GNU Lesser General Public
17+
License along with this library; if not, write to the Free Software
18+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19+
02110-1301 USA
20+
21+
22+
This file contains more detailed doctests for the stdnum.de.stnr module. It
23+
tries to validate a number of numbers that have been found online.
24+
25+
>>> from stdnum.de import stnr
26+
27+
28+
Some simple tests.
29+
30+
>>> stnr.validate('1123456789')
31+
'1123456789'
32+
>>> stnr.validate('1123456789', 'Berlin')
33+
'1123456789'
34+
>>> stnr.validate('12234567890', 'Berlin')
35+
Traceback (most recent call last):
36+
...
37+
InvalidFormat: ...
38+
>>> stnr.validate('1123456789', 'Unknown region')
39+
Traceback (most recent call last):
40+
...
41+
InvalidComponent: ...
42+
>>> stnr.validate('1234567890') # 10-digit number
43+
'1234567890'
44+
>>> stnr.validate('12345678901') # 11-digit number
45+
'12345678901'
46+
>>> stnr.validate('1123045678901') # 13-digit number
47+
'1123045678901'
48+
>>> stnr.validate('123456789') # short number
49+
Traceback (most recent call last):
50+
...
51+
InvalidLength: ...
52+
53+
54+
The module should handle various encodings of region names properly.
55+
56+
>>> stnr.validate('9381508152', u'Baden-W\xfcrttemberg') # Python unicode
57+
'9381508152'
58+
>>> stnr.validate('9381508152', 'Baden-W\xc3\xbcrttemberg') # UTF-8
59+
'9381508152'
60+
>>> stnr.validate('9381508152', 'Baden-W\xfcrttemberg') # ISO-8859-15
61+
'9381508152'
62+
>>> stnr.validate('9381508152', 'Baden Wurttemberg') # ASCII with space
63+
'9381508152'
64+
65+
66+
Given a number we are able to find a region.
67+
68+
>>> stnr.guess_regions('1123045678901') # 13-digit number
69+
['Berlin']
70+
>>> stnr.guess_regions('98765432101') # 11-digit number
71+
['Bayern', 'Nordrhein-Westfalen']
72+
>>> stnr.guess_regions('123') # invalid number
73+
[]
74+
75+
76+
We can convert the 13-digit country number to a regional number without
77+
issues. We can also convert it back if we know the region.
78+
79+
>>> stnr.guess_regions('2475081508152')
80+
['Bremen']
81+
>>> stnr.to_regional_number('2475081508152')
82+
'7581508152'
83+
>>> stnr.validate('7581508152', 'Bremen')
84+
'7581508152'
85+
>>> stnr.to_regional_number('123')
86+
Traceback (most recent call last):
87+
...
88+
InvalidFormat: ...
89+
>>> stnr.to_country_number('7581508152', 'Bremen')
90+
'2475081508152'
91+
>>> stnr.to_country_number('7581508152') # not unique, need region
92+
Traceback (most recent call last):
93+
...
94+
InvalidComponent: ...
95+
>>> stnr.to_country_number('123')
96+
Traceback (most recent call last):
97+
...
98+
InvalidFormat: ...
99+
100+
101+
We can also format numbers by separating the groups with slashes. This is
102+
most often seen for regional numbers and the 13-digit numbers don't get the
103+
slashes.
104+
105+
>>> stnr.format('18181508155', 'Bayern')
106+
'181/815/08155'
107+
>>> stnr.format('18181508155', 'Nordrhein-Westfalen')
108+
'181/8150/8155'
109+
>>> stnr.format('2181508150')
110+
'21/815/08150'
111+
>>> stnr.format('156 / 141 / 14808', 'Thuringen')
112+
'156/141/14808'
113+
>>> stnr.format('2893081508152') # 13-digit number
114+
'2893081508152'
115+
>>> stnr.format('123') # unknown format
116+
'123'
117+
118+
119+
These have been found online and should all be valid numbers.
120+
121+
>>> numbers = '''
122+
...
123+
... 010/815/08182
124+
... 013 815 08153
125+
... 02/815/08156
126+
... 04 522 658 002
127+
... 042/213/02423
128+
... 048/815/08155
129+
... 079/815/08151
130+
... 101/5761/1744
131+
... 101/815/08154
132+
... 1010081508182
133+
... 1121081508150
134+
... 116/5701/1448
135+
... 123/456/7890
136+
... 133/5909/3295
137+
... 133/8150/8159
138+
... 14044/00050
139+
... 143/317/22090
140+
... 147/276/80579
141+
... 151/815/08156
142+
... 156 / 141 / 14808
143+
... 162/107/03482
144+
... 181/815/08155
145+
... 1929008636
146+
... 201/123/12340
147+
... 201/5902/3626
148+
... 201/5906/3686
149+
... 202/ 106/ 08312
150+
... 203/100/04333
151+
... 20418290688
152+
... 208/140/04075
153+
... 21/815/08150
154+
... 212/5730/0455
155+
... 2129081508158
156+
... 22/815/08154
157+
... 220/5769/0078
158+
... 2202081508156
159+
... 2324081508151
160+
... 24/815/08151
161+
... 2475081508152
162+
... 249/115/90057
163+
... 249/133/90020
164+
... 26 242 02421
165+
... 2613081508153
166+
... 27 173 00028
167+
... 27/673/50365
168+
... 2722081508154
169+
... 2893081508152
170+
... 29/815/08158
171+
... 3048081508155
172+
... 307/5904/0270
173+
... 3101081508154
174+
... 312/5120/1726
175+
... 313/5753/1315
176+
... 3201012312340
177+
... 332/5751/2 653
178+
... 332/5776/0076
179+
... 339/5822/0944
180+
... 342/5938/0307
181+
... 4079081508151
182+
... 4151081508156
183+
... 5133081508159
184+
... 75 815 08152
185+
... 76 001/12 885
186+
... 9181081508155
187+
... 93815/08152
188+
... 99015/28445
189+
... 99019132055
190+
...
191+
... '''
192+
>>> [x for x in numbers.splitlines() if x and not stnr.is_valid(x)]
193+
[]

0 commit comments

Comments
 (0)
0