8000 sentiment analysis · niralkk/python-docs-samples@e0e6b9d · GitHub
[go: up one dir, main page]

Skip to content

Commit e0e6b9d

Browse files
sentiment analysis
1 parent 70a24a0 commit e0e6b9d

7 files changed

+963
-0
lines changed
Lines changed: 286 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,286 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright 2019 Google LLC
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
"""This application demonstrates how to perform basic operations on Dataset
18+
with the Google AutoML Natural Language API.
19+
20+
For more information, see the tutorial page at
21+
https://cloud.google.com/natural-language/automl/docs/
22+
"""
23+
24+
import argparse
25+
import os
26+
from datetime import datetime
27+
28+
def create_dataset(project_id, compute_region, dataset_name, sentiment_max):
29+
"""Create a dataset for sentiment."""
30+
# [START automl_natural_language_create_dataset]
31+
# TODO(developer): Uncomment and set the following variables
32+
# project_id = '[PROJECT_ID]'
33+
# compute_region = '[COMPUTE_REGION]'
34+
# dataset_name = '[DATASET_NAME]'
35+
# sentiment_max = Integer score for sentiment with max of 10
36+
37+
from google.cloud import automl_v1beta1 as automl
38+
39+
client = automl.AutoMlClient()
40+
41+
# A resource that represents Google Cloud Platform location.
42+
project_location = client.location_path(project_id, compute_region)
43+
44+
# Specify the sentiment score for the dataset.
45+
dataset_metadata = {"sentiment_max": sentiment_max}
46+
47+
# Set dataset name and metadata.
48+
my_dataset = {
49+
"display_name": dataset_name,
50+
"text_sentiment_dataset_metadata": dataset_metadata,
51+
}
52+
53+
# Create a dataset with the dataset metadata in the region.
54+
dataset = client.create_dataset(project_location, my_dataset)
55+
56+
# Display the dataset information.
57+
print("Dataset name: {}".format(dataset.name))
58+
print("Dataset id: {}".format(dataset.name.split("/")[-1]))
59+
print("Dataset display name: {}".format(dataset.display_name))
60+
print("Text sentiment dataset metadata:")
61+
print("\t{}".format(dataset.text_sentiment_dataset_metadata))
62+
print("Dataset example count: {}".format(dataset.example_count))
63+
print("Model create time: {}".format(datetime.fromtimestamp(dataset.create_time.seconds).strftime("%Y-%m-%dT%H:%M:%SZ")))
64+
65+
# [END automl_natural_language_create_dataset]
66+
67+
68+
def list_datasets(project_id, compute_region, filter_):
69+
"""List all datasets."""
70+
# [START automl_natural_language_list_datasets]
71+
# TODO(developer): Uncomment and set the following variables
72+
# project_id = '[PROJECT_ID]'
73+
# compute_region = '[COMPUTE_REGION]'
74+
# filter_ = 'filter expression here'
75+
76+
from google.cloud import automl_v1beta1 as automl
77+
78+
client = automl.AutoMlClient()
79+
80+
# A resource that represents Google Cloud Platform location.
81+
project_location = client.location_path(project_id, compute_region)
82+
83+
# List all the datasets available in the region by applying filter.
84+
response = client.list_datasets(project_location, filter_)
85+
86+
print("List of datasets:")
87+
for dataset in response:
88+
# Display the dataset information.
89+
print("Dataset name: {}".format(dataset.name))
90+
print("Dataset id: {}".format(dataset.name.split("/")[-1]))
91+
print("Dataset display name: {}".format(dataset.display_name))
92+
print("Text sentiment dataset metadata:")
93+
print("\t{}".format(dataset.text_sentiment_dataset_metadata))
94+
print("Dataset example count: {}".format(dataset.example_count))
95+
print("Model create time: {}".format(datetime.fromtimestamp(dataset.create_time.seconds).strftime("%Y-%m-%dT%H:%M:%SZ")))
96+
97+
# [END automl_natural_language_list_datasets]
98+
99+
100+
def get_dataset(project_id, compute_region, dataset_id):
101+
"""Get the dataset."""
102+
# [START automl_natural_language_get_dataset]
103+
# TODO(developer): Uncomment and set the following variables
104+
# project_id = '[PROJECT_ID]'
105+
# compute_region = '[COMPUTE_REGION]'
106+
# dataset_id = '[DATASET_ID]'
107+
108+
from google.cloud import automl_v1beta1 as automl
109+
110+
client = automl.AutoMlClient()
111+
112+
# Get the full path of the dataset
113+
dataset_full_id = client.dataset_path(
114+
project_id, compute_region, dataset_id
115+
)
116+
117+
# Get complete detail of the dataset.
118+
dataset = client.get_dataset(dataset_full_id)
119+
120+
# Display the dataset information.
121+
print("Dataset name: {}".format(dataset.name))
122+
print("Dataset id: {}".format(dataset.name.split("/")[-1]))
123+
print("Dataset display name: {}".format(dataset.display_name))
124+
print("Text sentiment dataset metadata:")
125+
print("\t{}".format(dataset.text_sentiment_dataset_metadata))
126+
print("Dataset example count: {}".format(dataset.example_count))
127+
print("Model create time: {}".format(datetime.fromtimestamp(dataset.create_time.seconds).strftime("%Y-%m-%dT%H:%M:%SZ")))
128+
129+
# [END automl_natural_language_get_dataset]
130+
131+
132+
def import_data(project_id, compute_region, dataset_id, path):
133+
"""Import labelled items."""
134+
# [START automl_natural_language_import_data]
135+
# TODO(developer): Uncomment and set the following variables
136+
# project_id = '[PROJECT_ID]'
137+
# compute_region = '[COMPUTE_REGION]'
138+
# dataset_id = '[DATASET_ID]'
139+
# path = 'gs://path/to/file.csv'
140+
141+
from google.cloud import automl_v1beta1 as automl
142+
143+
client = automl.AutoMlClient()
144+
145+
# Get the full path of the dataset.
146+
dataset_full_id = client.dataset_path(
147+
project_id, compute_region, dataset_id
148+
)
149+
150+
# Get the multiple Google Cloud Storage URIs.
151+
input_uris = path.split(",")
152+
input_config = {"gcs_source": {"input_uris": input_uris}}
153+
154+
# Import the dataset from the input URI.
155+
response = client.import_data(dataset_full_id, input_config)
156+
157+
print("Processing import...")
158+
# synchronous check of operation status.
159+
print("Data imported. {}".format(response.result()))
160+
161+
# [END automl_natural_language_import_data]
162+
163+
164+
def export_data(project_id, compute_region, dataset_id, output_uri):
165+
"""Export a dataset to a Google Cloud Storage bucket."""
166+
# [START automl_natural_language_export_data]
167+
# TODO(developer): Uncomment and set the following variables
168+
# project_id = '[PROJECT_ID]'
169+
# compute_region = '[COMPUTE_REGION]'
170+
# dataset_id = '[DATASET_ID]'
171+
# output_uri: 'gs://location/to/export/data'
172+
173+
from google.cloud import automl_v1beta1 as automl
174+
175+
client = automl.AutoMlClient()
176+
177+
# Get the full path of the dataset.
178+
dataset_full_id = client.dataset_path(
179+
project_id, compute_region, dataset_id
180+
)
181+
182+
# Set the output URI
183+
output_config = {"gcs_destination": {"output_uri_prefix": output_uri}}
184+
185+
# Export the data to the output URI.
186+
response = client.export_data(dataset_full_id, output_config)
187+
188+
print("Processing export...")
189+
# synchronous check of operation status.
190+
print("Data exported. {}".format(response.result()))
191+
192+
# [END automl_natural_language_export_data]
193+
194+
195+
def delete_dataset(project_id, compute_region, dataset_id):
196+
"""Delete a dataset."""
197+
# [START automl_natural_language_delete_dataset]
198+
# TODO(developer): Uncomment and set the following variables
199+
# project_id = '[PROJECT_ID]'
200+
# compute_region = '[COMPUTE_REGION]'
201+
# dataset_id = '[DATASET_ID]'
202+
203+
from google.cloud import automl_v1beta1 as automl
204+
205+
client = automl.AutoMlClient()
206+
207+
# Get the full path of the dataset.
208+
dataset_full_id = client.dataset_path(
209+
project_id, compute_region, dataset_id
210+
)
211+
212+
# Delete a dataset.
213+
response = client.delete_dataset(dataset_full_id)
214+
215+
# synchronous check of operation status.
216+
print("Dataset deleted. {}".format(response.result()))
217+
218+
# [END automl_natural_language_delete_dataset]
219+
220+
221+
if __name__ == "__main__":
222+
parser = argparse.ArgumentParser(
223+
description=__doc__,
224+
formatter_class=argparse.RawDescriptionHelpFormatter,
225+
)
226+
subparsers = parser.add_subparsers(dest="command")
227+
228+
create_dataset_parser = subparsers.add_parser(
229+
"create_dataset", help=create_dataset.__doc__
230+
)
231+
create_dataset_parser.add_argument("dataset_name")
232+
create_dataset_parser.add_argument("sentiment_max")
233+
234+
list_datasets_parser = subparsers.add_parser(
235+
"list_datasets", help=list_datasets.__doc__
236+
)
237+
list_datasets_parser.add_argument(
238+
"filter_", nargs="?", default="text_sentiment_dataset_metadata:*"
239+
)
240+
241+
get_dataset_parser = subparsers.add_parser(
242+
"get_dataset", help=get_dataset.__doc__
243+
)
244+
get_dataset_parser.add_argument("dataset_id")
245+
246+
import_data_parser = subparsers.add_parser(
247+
"import_data", help=import_data.__doc__
248+
)
249+
import_data_parser.add_argument("dataset_id")
250+
import_data_parser.add_argument(
251+
"path", nargs="?", default="gs://cloud-ml-data/NL-entity/dataset.csv"
252+
)
253+
254+
export_data_parser = subparsers.add_parser(
255+
"export_data", help=export_data.__doc__
256+
)
257+
export_data_parser.add_argument("dataset_id")
258+
export_data_parser.add_argument("output_uri")
259+
260+
delete_dataset_parser = subparsers.add_parser(
261+
"delete_dataset", help=delete_dataset.__doc__
262+
)
263+
delete_dataset_parser.add_argument("dataset_id")
264+
265+
project_id = os.environ["PROJECT_ID"]
266+
compute_region = os.environ["REGION_NAME"]
267+
268+
args = parser.parse_args()
269+
270+
if args.command == "create_dataset":
271+
sentiment_max = int(args.sentiment_max)
272+
create_dataset(
273+
project_id, compute_region, args.dataset_name, sentiment_max
274+
)
275+
if args.command == "list_datasets":
276+
list_datasets(project_id, compute_region, args.filter_)
277+
if args.command == "get_dataset":
278+
get_dataset(project_id, compute_region, args.dataset_id)
279+
if args.command == "import_data":
280+
import_data(project_id, compute_region, args.dataset_id, args.path)
281+
if args.command == "export_data":
282+
export_data(
283+
project_id, compute_region, args.dataset_id, args.output_uri
284+
)
285+
if args.command == "delete_dataset":
286+
delete_dataset(project_id, compute_region, args.dataset_id)

0 commit comments

Comments
 (0)
0