8000 fixed jvm error by bradmiro · Pull Request #8360 · GoogleCloudPlatform/python-docs-samples · GitHub
[go: up one dir, main page]

Skip to content

fixed jvm error #8360

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 27, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 33 additions & 32 deletions composer/2022_airflow_summit/data_analytics_process_expansion.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,38 +23,6 @@
from pyspark.sql import SparkSession
import pyspark.sql.functions as f

# Inverse Distance Weighting algorithm (DWA)
@f.pandas_udf("YEAR integer, VALUE double", f.PandasUDFType.GROUPED_MAP)
def phx_dw_compute(year, df) -> pd.DataFrame:
# This adjusts the rainfall / snowfall in Phoenix for a given year using Inverse Distance Weighting
# based on each weather station's distance to Phoenix. The closer a station is to Phoenix, the higher
# its measurement is weighed.
#
# This function combines the distance equation and inverse distance factor since the distance equation is:
#
# d = sqrt((x1-x2)^2 + (y1-y2)^2))
#
# and the inverse distance factor is:
#
# idf = 1 / d^2
#
# so we negate the square and square root to combine this into:
#
# idf = 1 / ((x1-x2)^2 + (y1-y2)^2))

# Latitude and longitude of Phoenix
PHX_LATITUDE = 33.4484
PHX_LONGITUDE = -112.0740

inverse_distance_factors = 1.0 / (
(PHX_LATITUDE - df.LATITUDE) ** 2 + (PHX_LONGITUDE - df.LONGITUDE) ** 2
)

# Calculate each station's weight
weights = inverse_distance_factors / inverse_distance_factors.sum()

return pd.DataFrame({"YEAR": year, "VALUE": (weights * df.ANNUAL_AMOUNT).sum()})


if __name__ == "__main__":
# read in the input argument
Expand Down Expand Up @@ -133,6 +101,39 @@ def phx_dw_compute(year, df) -> pd.DataFrame:
states_near_phx = ["AZ", "CA", "CO", "NM", "NV", "UT"]
annual_df = df.where(df.STATE.isin(states_near_phx))

# Inverse Distance Weighting algorithm (DWA)
@f.pandas_udf("YEAR integer, VALUE double", f.PandasUDFType.GROUPED_MAP)
def phx_dw_compute(year, df) -> pd.DataFrame:
# This adjusts the rainfall / snowfall in Phoenix for a given year using Inverse Distance Weighting
# based on each weather station's distance to Phoenix. The closer a station is to Phoenix, the higher
# its measurement is weighed.
#
# This function combines the distance equation and inverse distance factor since the distance equation is:
#
# d = sqrt((x1-x2)^2 + (y1-y2)^2))
#
# and the inverse distance factor is:
#
# idf = 1 / d^2
#
# so we negate the square and square root to combine this into:
#
# idf = 1 / ((x1-x2)^2 + (y1-y2)^2))

# Latitude and longitude of Phoenix
PHX_LATITUDE = 33.4484
PHX_LONGITUDE = -112.0740

inverse_distance_factors = 1.0 / (
(PHX_LATITUDE - df.LATITUDE) ** 2 +
(PHX_LONGITUDE - df.LONGITUDE) ** 2
)

# Calculate each station's weight
weights = inverse_distance_factors / inverse_distance_factors.sum()

return pd.DataFrame({"YEAR": year, "VALUE": (weights * df.ANNUAL_AMOUNT).sum()})

# Calculate the distance-weighted precipitation amount
phx_annual_prcp_df = (
annual_df.where((annual_df.ELEMENT == "PRCP"))
Expand Down
0