8000 Add text classification samples · mmmarklu/python-docs-samples@b7d5691 · GitHub
[go: up one dir, main page]

Skip to content

Commit b7d5691

Browse files
committed
Add text classification samples
1 parent 77fc490 commit b7d5691

6 files changed

+288
-0
lines changed
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright 2019 Google LLC
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
18+
def create_dataset(project_id, display_name):
19+
"""Create a dataset."""
20+
# [START automl_language_text_classification_create_dataset]
21+
from google.cloud import automl
22+
23+
# TODO(developer): Uncomment and set the following variables
24+
# project_id = 'YOUR_PROJECT_ID'
25+
# display_name = 'YOUR_DATASET_NAME'
26+
27+
client = automl.AutoMlClient()
28+
29+
# A resource that represents Google Cloud Platform location.
30+
project_location = client.location_path(project_id, 'us-central1')
31+
# Specify the classification type
32+
# Types:
33+
# MultiLabel: Multiple labels are allowed for one example.
34+
# MultiClass: At most one label is allowed per example.
35+
metadata = automl.types.TextClassificationDatasetMetadata(
36+
classification_type=automl.enums.ClassificationType.MULTICLASS)
37+
dataset = automl.types.Dataset(
38+
display_name=display_name,
39+
text_classification_dataset_metadata=metadata)
40+
41+
# Create a dataset with the dataset metadata in the region.
42+
response = client.create_dataset(project_location, dataset)
43+
44+
created_dataset = response.result()
45+
46+
# Display the dataset information
47+
print(u'Dataset name: {}'.format(created_dataset.name))
48+
print(u'Dataset id: {}'.format(created_dataset.name.split('/')[-1]))
49+
# [END automl_language_text_classification_create_dataset]
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright 2019 Google LLC
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
18+
def create_model(project_id, dataset_id, display_name):
19+
"""Create a model."""
20+
# [START automl_language_text_classification_create_model]
21+
from google.cloud import automl
22+
23+
# TODO(developer): Uncomment and set the following variables
24+
# project_id = 'YOUR_PROJECT_ID'
25+
# dataset_id = 'YOUR_DATASET_ID'
26+
# display_name = 'YOUR_MODEL_NAME'
27+
28+
client = automl.AutoMlClient()
29+
30+
# A resource that represents Google Cloud Platform location.
31+
project_location = client.location_path(project_id, 'us-central1')
32+
# Leave model unset to use the default base model provided by Google
33+
metadata = automl.types.TextClassificationModelMetadata()
34+
model = automl.types.Model(
35+
display_name=display_name,
36+
dataset_id=dataset_id,
37+
text_classification_model_metadata=metadata)
38+
39+
# Create a model with the model metadata in the region.
40+
response = client.create_model(project_location, model)
41+
42+
print(u'Training operation name: {}'.format(response.operation.name))
43+
print('Training started...')
44+
# [END automl_language_text_classification_create_model]
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright 2019 Google LLC
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
import datetime
18+
import os
19+
20+
import pytest
21+
22+
import language_text_classification_create_dataset
23+
import import_dataset
24+
import delete_dataset
25+
import list_datasets
26+
import get_dataset
27+
import export_dataset
28+
29+
PROJECT_ID = os.environ['GCLOUD_PROJECT']
30+
BUCKET_ID = '{}-lcm'.format(PROJECT_ID)
31+
DATASET_ID = 'TCN2551826603472450019'
32+
33+
34+
@pytest.mark.slow
35+
def test_create_import_delete_dataset(capsys):
36+
# create dataset
37+
dataset_name = 'test_' + datetime.datetime.now().strftime('%Y%m%d%H%M%S')
38+
language_text_classification_create_dataset.create_dataset(
39+
PROJECT_ID, dataset_name)
40+
out, _ = capsys.readouterr()
41+
assert 'Dataset id: ' in out
42+
43+
# import data
44+
dataset_id = out.splitlines()[1].split()[2]
45+
data = 'gs://{}/happiness.csv'.format(BUCKET_ID)
46+
import_dataset.import_dataset(PROJECT_ID, dataset_id, data)
47+
out, _ = capsys.readouterr()
48+
assert 'Data imported.' in out
49+
50+
# delete dataset
51+
delete_dataset.delete_dataset(PROJECT_ID, dataset_id)
52+
out, _ = capsys.readouterr()
53+
assert 'Dataset deleted.' in out
54+
55+
56+
def test_list_dataset(capsys):
57+
# list datasets
58+
list_datasets.list_datasets(PROJECT_ID)
59+
out, _ = capsys.readouterr()
60+
assert 'Dataset id: {}'.format(DATASET_ID) in out
61+
62+
63+
def test_get_dataset(capsys):
64+
get_dataset.get_dataset(PROJECT_ID, DATASET_ID)
65+
out, _ = capsys.readouterr()
66+
assert 'Dataset name: ' in out
67+
68+
69+
def test_export_dataset(capsys):
70+
export_dataset.export_dataset(
71+
PROJECT_ID,
72+
DATASET_ID,
73+
'gs://{}/TEST_EXPORT_OUTPUT/'.format(BUCKET_ID))
74+
75+
out, _ = capsys.readouterr()
76+
assert 'Dataset exported' in out
77+
78+
from google.cloud import storage
79+
storage_client = storage.Client()
80+
bucket = storage_client.get_bucket(BUCKET_ID)
81+
if len(list(bucket.list_blobs(prefix='TEST_EXPORT_OUTPUT'))) > 0:
82+
for blob in bucket.list_blobs(prefix='TEST_EXPORT_OUTPUT'):
83+
blob.delete()
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright 2018 Google LLC
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
import os
18+
19+
import pytest
20+
21+
import deploy_model
22+
import undeploy_model
23+
24+
PROJECT_ID = os.environ['GCLOUD_PROJECT']
25+
MODEL_ID = 'TCN3472481026502981088'
26+
27+
28+
@pytest.mark.slow
29+
def test_deploy_undeploy_model(capsys):
30+
undeploy_model.undeploy_model(PROJECT_ID, MODEL_ID)
31+
out, _ = capsys.readouterr()
32+
assert 'Model undeployment finished.' in out
33+
34+
deploy_model.deploy_model(PROJECT_ID, MODEL_ID)
35+
out, _ = capsys.readouterr()
36+
assert 'Model deployment finished.' in out
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright 2019 Google LLC
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
18+
def predict(project_id, model_id, content):
19+
"""Predict."""
20+
# [START automl_language_text_classification_predict]
21+
from google.cloud import automl
22+
23+
# TODO(developer): Uncomment and set the following variables
24+
# project_id = 'YOUR_PROJECT_ID'
25+
# model_id = 'YOUR_MODEL_ID'
26+
# content = 'text to predict'
27+
28+
prediction_client = automl.PredictionServiceClient()
29+
30+
# Get the full path of the model.
31+
model_full_id = prediction_client.model_path(
32+
project_id, 'us-central1', model_id
33+
)
34+
35+
text_snippet = automl.types.TextSnippet(
36+
content=content,
37+
mime_type='text/plain') # Types: 'text/plain', 'text/html'
38+
payload = automl.types.ExamplePayload(text_snippet=text_snippet)
39+
40+
response = prediction_client.predict(model_full_id, payload)
41+
42+
for annotation_payload in response.payload:
43+
print(u'Predicted class name: {}'.format(
44+
annotation_payload.display_name))
45+
print(u'Predicted class score: {}'.format(
46+
annotation_payload.classification.score))
47+
# [END automl_language_text_classification_predict]
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright 2018 Google LLC
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
import os
18+
19+
import language_text_classification_predict
20+
21+
PROJECT_ID = os.environ['GCLOUD_PROJECT']
22+
MODEL_ID = 'TCN3472481026502981088'
23+
24+
25+
def test_predict(capsys):
26+
text = 'Fruit and nut flavour'
27+
language_text_classification_predict.predict(PROJECT_ID, MODEL_ID, text)
28+
out, _ = capsys.readouterr()
29+
assert 'Predicted class name: ' in out

0 commit comments

Comments
 (0)
0