diff --git a/xrspatial/emerging_hotspots.py b/xrspatial/emerging_hotspots.py
index 02c83552..5f101b2c 100644
--- a/xrspatial/emerging_hotspots.py
+++ b/xrspatial/emerging_hotspots.py
@@ -22,11 +22,14 @@
 import xarray as xr
 from numba import prange
 
-from xrspatial.convolution import convolve_2d, _convolve_2d_numpy, _convolve_2d_cupy
-from xrspatial.focal import _calc_hotspots_numpy
+from xrspatial.convolution import convolve_2d
+from xrspatial.focal import (
+    _calc_hotspots_numpy,
+    _gistar_global_stats,
+    _gistar_zscore,
+)
 from xrspatial.utils import (
     ArrayTypeFunctionMapping,
-    _boundary_to_dask,
     _validate_boundary,
     ngjit,
     not_implemented_func,
@@ -108,6 +111,51 @@ def _check_memory(n_times, ny, nx):
         )
 
 
+# ---------------------------------------------------------------------------
+# Per-time-step Getis-Ord Gi* z-score
+# ---------------------------------------------------------------------------
+
+def _gistar_step_finalize(weighted_sum, weight_sum, sq_weight_sum,
+                          global_mean, global_std, n):
+    """Gi* z-score from convolution terms, NaN where the neighborhood is empty.
+
+    ``_gistar_zscore`` maps an empty neighborhood (``weight_sum == 0``,
+    e.g. a cell whose own value and every neighbor are NaN) to a z-score
+    of 0. Emerging hot spot analysis needs those cells to stay NaN so the
+    Mann-Kendall step can detect pixels that have no data across the whole
+    time series instead of treating them as a constant cold series.
+    """
+    z = _gistar_zscore(weighted_sum, weight_sum, sq_weight_sum,
+                       global_mean, global_std, n)
+    return np.where(weight_sum > 0, z, np.float32(np.nan))
+
+
+def _gistar_step_zscore(step_data, kernel, sq_kernel, global_mean,
+                        global_std, n, convolve):
+    """Getis-Ord Gi* z-score for a single 2-D time step.
+
+    ``convolve`` is the backend convolution (numpy or cupy) applied with
+    edge padding already handled by the caller. The neighborhood weight
+    sum ``W_i`` and squared-weight sum ``W2_i`` come from convolving the
+    validity mask with the kernel and the squared kernel, so raster edges
+    and interior NaN cells drop out of the sums exactly as ``hotspots()``
+    does. ``global_mean``, ``global_std``, and ``n`` are the global terms
+    over the whole space-time cube.
+    """
+    # Single-array backends pass a concrete numpy or cupy slice, so pick the
+    # matching module to build the validity mask. The dask helper below works
+    # off da.* instead, since its slices are lazy dask arrays of either chunk
+    # type and convolve_2d dispatches on the chunk type at compute time.
+    xp = cupy.get_array_module(step_data) if cupy is not None else np
+    valid = (~xp.isnan(step_data)).astype(step_data.dtype)
+    filled = xp.where(valid > 0, step_data, step_data.dtype.type(0.0))
+    weighted_sum = convolve(filled, kernel)
+    weight_sum = convolve(valid, kernel)
+    sq_weight_sum = convolve(valid, sq_kernel)
+    return _gistar_step_finalize(weighted_sum, weight_sum, sq_weight_sum,
+                                 global_mean, global_std, n)
+
+
 # ---------------------------------------------------------------------------
 # Mann-Kendall helpers (Numba-JIT for use inside pixel loops)
 # ---------------------------------------------------------------------------
@@ -395,24 +443,23 @@ def _emerging_hotspots_numpy(raster, kernel, boundary='nan'):
     data = raster.data.astype(np.float32)
     n_times, ny, nx = data.shape
 
-    # Global statistics across entire space-time cube
-    global_mean = np.nanmean(data)
-    global_std = np.nanstd(data)
-    if global_std == 0:
-        raise ZeroDivisionError(
-            "Standard deviation of the input raster values is 0."
-        )
+    # Global Gi* terms across the entire space-time cube.
+    global_mean = np.float32(np.nanmean(data))
+    global_std = np.float32(np.nanstd(data))
+    n = int((~np.isnan(data)).sum())
+    _gistar_global_stats(global_mean, global_std, n)
 
-    norm_kernel = (kernel / kernel.sum()).astype(np.float32)
+    kernel = kernel.astype(np.float32)
+    sq_kernel = kernel * kernel
+    convolve = partial(convolve_2d, boundary=boundary)
 
     # Compute Gi* z-scores and confidence bins per time step
     gi_zscore = np.empty((n_times, ny, nx), dtype=np.float32)
     gi_bin = np.empty((n_times, ny, nx), dtype=np.int8)
 
     for t in range(n_times):
-        step_data = data[t]
-        convolved = convolve_2d(step_data, norm_kernel, boundary)
-        z = (convolved - global_mean) / global_std
+        z = _gistar_step_zscore(data[t], kernel, sq_kernel, global_mean,
+                                global_std, n, convolve)
         gi_zscore[t] = z
         gi_bin[t] = _calc_hotspots_numpy(z)
 
@@ -432,20 +479,20 @@ def _emerging_hotspots_cupy(raster, kernel, boundary='nan'):
     data = raster.data.astype(cupy.float32)
     n_times, ny, nx = data.shape
 
-    global_mean = cupy.nanmean(data)
-    global_std = cupy.nanstd(data)
-    if float(global_std) == 0:
-        raise ZeroDivisionError(
-            "Standard deviation of the input raster values is 0."
-        )
+    global_mean = np.float32(float(cupy.nanmean(data)))
+    global_std = np.float32(float(cupy.nanstd(data)))
+    n = int((~cupy.isnan(data)).sum())
+    _gistar_global_stats(global_mean, global_std, n)
 
-    norm_kernel = (kernel / kernel.sum()).astype(np.float32)
+    kernel = cupy.asarray(kernel).astype(cupy.float32)
+    sq_kernel = kernel * kernel
+    convolve = partial(convolve_2d, boundary=boundary)
 
-    # GPU: convolution + z-score per time step
+    # GPU: per-time-step Gi* z-score
     gi_zscore_gpu = cupy.empty((n_times, ny, nx), dtype=cupy.float32)
     for t in range(n_times):
-        convolved = convolve_2d(data[t], norm_kernel, boundary)
-        gi_zscore_gpu[t] = (convolved - global_mean) / global_std
+        gi_zscore_gpu[t] = _gistar_step_zscore(
+            data[t], kernel, sq_kernel, global_mean, global_std, n, convolve)
 
     # Transfer to CPU for Mann-Kendall (branching-heavy, better on CPU)
     gi_zscore_cpu = cupy.asnumpy(gi_zscore_gpu)
@@ -471,10 +518,23 @@ def _emerging_hotspots_cupy(raster, kernel, boundary='nan'):
 # Dask + NumPy backend
 # ---------------------------------------------------------------------------
 
-def _convolve_and_zscore_chunk(chunk, kernel, global_mean, global_std):
-    """Dask chunk function: convolve -> z-score."""
-    convolved = _convolve_2d_numpy(chunk, kernel)
-    return ((convolved - global_mean) / global_std).astype(np.float32)
+def _gistar_step_dask(step_data, kernel, sq_kernel, global_mean,
+                      global_std, n, boundary):
+    """Lazy per-time-step Gi* z-score for a dask-backed 2-D slice.
+
+    Mirrors ``_hotspots_dask_numpy``: the convolutions go through the
+    dispatching ``convolve_2d`` so the dask boundary handling matches the
+    numpy/cupy single-array paths exactly. NaN cells are excluded from
+    every neighborhood sum via the validity mask.
+    """
+    valid = ~da.isnan(step_data)
+    valid_f = valid.astype(step_data.dtype)
+    filled = da.where(valid, step_data, step_data.dtype.type(0.0))
+    weighted_sum = convolve_2d(filled, kernel, boundary)
+    weight_sum = convolve_2d(valid_f, kernel, boundary)
+    sq_weight_sum = convolve_2d(valid_f, sq_kernel, boundary)
+    return _gistar_step_finalize(weighted_sum, weight_sum, sq_weight_sum,
+                                 global_mean, global_std, n)
 
 
 def _mk_classify_dask_chunk(block):
@@ -520,36 +580,23 @@ def _emerging_hotspots_dask_numpy(raster, kernel, boundary='nan'):
 
     n_times = data.shape[0]
 
-    # Pass 1: eagerly compute global statistics (two scalars)
-    global_mean, global_std = da.compute(da.nanmean(data), da.nanstd(data))
+    # Pass 1: eagerly compute global Gi* terms over the whole cube.
+    global_mean, global_std, n = da.compute(
+        da.nanmean(data), da.nanstd(data), (~da.isnan(data)).sum())
     global_mean = np.float32(global_mean)
     global_std = np.float32(global_std)
+    n = int(n)
+    _gistar_global_stats(global_mean, global_std, n)
 
-    if global_std == 0:
-        raise ZeroDivisionError(
-            "Standard deviation of the input raster values is 0."
-        )
+    kernel = kernel.astype(np.float32)
+    sq_kernel = kernel * kernel
 
-    norm_kernel = (kernel / kernel.sum()).astype(np.float32)
-    pad_h = norm_kernel.shape[0] // 2
-    pad_w = norm_kernel.shape[1] // 2
-
-    # Pass 2: per time step, convolve + z-score via map_overlap, then stack
-    zscore_slices = []
-    for t in range(n_times):
-        _func = partial(
-            _convolve_and_zscore_chunk,
-            kernel=norm_kernel,
-            global_mean=global_mean,
-            global_std=global_std,
-        )
-        z_t = data[t].map_overlap(
-            _func,
-            depth=(pad_h, pad_w),
-            boundary=_boundary_to_dask(boundary),
-            meta=np.array((), dtype=np.float32),
-        )
-        zscore_slices.append(z_t)
+    # Pass 2: per time step, lazy Gi* z-score via convolve_2d, then stack.
+    zscore_slices = [
+        _gistar_step_dask(data[t], kernel, sq_kernel, global_mean,
+                          global_std, n, boundary)
+        for t in range(n_times)
+    ]
 
     gi_zscore = da.stack(zscore_slices, axis=0)
     # Rechunk so time axis is in a single chunk (time dim is small)
@@ -582,12 +629,6 @@ def _emerging_hotspots_dask_numpy(raster, kernel, boundary='nan'):
 # Dask + CuPy backend (GPU convolution, CPU Mann-Kendall)
 # ---------------------------------------------------------------------------
 
-def _convolve_and_zscore_cupy_chunk(chunk, kernel, global_mean, global_std):
-    """Dask chunk function: GPU convolve -> z-score."""
-    convolved = _convolve_2d_cupy(chunk, kernel)
-    return ((convolved - global_mean) / global_std).astype(cupy.float32)
-
-
 def _gi_bin_cupy_chunk(block):
     """Dask chunk function: z-scores -> confidence bins (CPU classify, CuPy I/O)."""
     block_cpu = cupy.asnumpy(block)
@@ -622,36 +663,23 @@ def _emerging_hotspots_dask_cupy(raster, kernel, boundary='nan'):
 
     n_times = data.shape[0]
 
-    # Pass 1: eagerly compute global statistics (two scalars)
-    global_mean, global_std = da.compute(da.nanmean(data), da.nanstd(data))
+    # Pass 1: eagerly compute global Gi* terms over the whole cube.
+    global_mean, global_std, n = da.compute(
+        da.nanmean(data), da.nanstd(data), (~da.isnan(data)).sum())
     global_mean = np.float32(float(global_mean))
     global_std = np.float32(float(global_std))
-
-    if global_std == 0:
-        raise ZeroDivisionError(
-            "Standard deviation of the input raster values is 0."
-        )
-
-    norm_kernel = (kernel / kernel.sum()).astype(np.float32)
-    pad_h = norm_kernel.shape[0] // 2
-    pad_w = norm_kernel.shape[1] // 2
-
-    # Pass 2: per time step, GPU convolve + z-score via map_overlap, then stack
-    _func = partial(
-        _convolve_and_zscore_cupy_chunk,
-        kernel=norm_kernel,
-        global_mean=global_mean,
-        global_std=global_std,
-    )
-    zscore_slices = []
-    for t in range(n_times):
-        z_t = data[t].map_overlap(
-            _func,
-            depth=(pad_h, pad_w),
-            boundary=_boundary_to_dask(boundary, is_cupy=True),
-            meta=cupy.array((), dtype=cupy.float32),
-        )
-        zscore_slices.append(z_t)
+    n = int(n)
+    _gistar_global_stats(global_mean, global_std, n)
+
+    kernel = kernel.astype(np.float32)
+    sq_kernel = kernel * kernel
+
+    # Pass 2: per time step, lazy GPU Gi* z-score via convolve_2d, then stack.
+    zscore_slices = [
+        _gistar_step_dask(data[t], kernel, sq_kernel, global_mean,
+                          global_std, n, boundary)
+        for t in range(n_times)
+    ]
 
     gi_zscore = da.stack(zscore_slices, axis=0)
     gi_zscore = gi_zscore.rechunk({0: n_times})
diff --git a/xrspatial/tests/test_emerging_hotspots.py b/xrspatial/tests/test_emerging_hotspots.py
index e6955a22..61dc2c0d 100644
--- a/xrspatial/tests/test_emerging_hotspots.py
+++ b/xrspatial/tests/test_emerging_hotspots.py
@@ -58,6 +58,100 @@ def test_invalid_boundary(self):
             emerging_hotspots(raster, _kernel_3x3(), boundary='invalid')
 
 
+# ---------------------------------------------------------------------------
+# Per-step Getis-Ord Gi* z-score (issue #2804)
+# ---------------------------------------------------------------------------
+
+def _gistar_reference(step, global_mean, global_std, n, kernel):
+    """Hand-rolled per-step Gi* z-score using the focal Gi* helpers.
+
+    Mirrors what hotspots() computes, but with the global mean/std/n taken
+    over the whole space-time cube (the emerging_hotspots convention).
+    """
+    from xrspatial.focal import _gistar_convolutions_numpy, _gistar_zscore
+
+    ws, wsum, sq = _gistar_convolutions_numpy(step, kernel, 'nan')
+    z = _gistar_zscore(ws, wsum, sq, global_mean, global_std, n)
+    return np.where(wsum > 0, z, np.float32(np.nan))
+
+
+class TestGiStarStep:
+    """The per-time-step z-score must be the real Gi* statistic, not the
+    old local-mean z-score (convolve with a normalized kernel then z-score
+    the local mean against the global mean/std)."""
+
+    def test_binary_kernel_matches_reference(self):
+        rng = np.random.default_rng(2804)
+        data = rng.standard_normal((4, 12, 12)).astype('f4')
+        kernel = np.ones((3, 3), dtype=np.float32)
+        ds = emerging_hotspots(_make_raster(data), kernel)
+
+        gm = np.float32(np.nanmean(data))
+        gs = np.float32(np.nanstd(data))
+        n = int((~np.isnan(data)).sum())
+        for t in range(data.shape[0]):
+            ref = _gistar_reference(data[t], gm, gs, n, kernel)
+            got = ds['gi_zscore'].values[t]
+            np.testing.assert_allclose(got, ref, atol=1e-5, equal_nan=True)
+
+    def test_weighted_kernel_matches_reference(self):
+        # A non-binary kernel makes W_i != W2_i, exercising the squared
+        # weight-sum term the old local-mean formula dropped entirely.
+        rng = np.random.default_rng(28041)
+        data = rng.standard_normal((3, 11, 11)).astype('f4')
+        kernel = np.array(
+            [[0.5, 1.0, 0.5],
+             [1.0, 2.0, 1.0],
+             [0.5, 1.0, 0.5]], dtype=np.float32,
+        )
+        ds = emerging_hotspots(_make_raster(data), kernel)
+
+        gm = np.float32(np.nanmean(data))
+        gs = np.float32(np.nanstd(data))
+        n = int((~np.isnan(data)).sum())
+        for t in range(data.shape[0]):
+            ref = _gistar_reference(data[t], gm, gs, n, kernel)
+            got = ds['gi_zscore'].values[t]
+            np.testing.assert_allclose(got, ref, atol=1e-5, equal_nan=True)
+
+    def test_differs_from_local_mean_zscore(self):
+        # Regression guard: the old code convolved with a normalized kernel
+        # and z-scored the local mean. That formula must NOT reproduce the
+        # current output for a weighted kernel.
+        rng = np.random.default_rng(28042)
+        data = rng.standard_normal((2, 10, 10)).astype('f4')
+        kernel = np.array(
+            [[0.0, 1.0, 0.0],
+             [1.0, 4.0, 1.0],
+             [0.0, 1.0, 0.0]], dtype=np.float32,
+        )
+        ds = emerging_hotspots(_make_raster(data), kernel)
+
+        from xrspatial.convolution import convolve_2d
+        gm = np.nanmean(data)
+        gs = np.nanstd(data)
+        norm_kernel = (kernel / kernel.sum()).astype(np.float32)
+        old = (convolve_2d(data[0], norm_kernel, 'nan') - gm) / gs
+
+        got = ds['gi_zscore'].values[0]
+        interior = (slice(1, -1), slice(1, -1))
+        assert not np.allclose(
+            got[interior], old[interior], atol=1e-3
+        ), "per-step z-score still matches the old local-mean formula"
+
+    def test_interior_nan_excluded_from_neighborhood(self):
+        # An interior NaN must drop out of the Gi* neighborhood sums; the
+        # surrounding finite cells still get finite z-scores.
+        rng = np.random.default_rng(28043)
+        data = rng.standard_normal((2, 9, 9)).astype('f4')
+        data[:, 4, 4] = np.nan
+        ds = emerging_hotspots(_make_raster(data), _kernel_3x3())
+        z0 = ds['gi_zscore'].values[0]
+        # Neighbors of the NaN cell are interior and stay finite.
+        assert np.isfinite(z0[3, 4])
+        assert np.isfinite(z0[5, 4])
+
+
 # ---------------------------------------------------------------------------
 # Normal CDF approximation
 # ---------------------------------------------------------------------------