首页
学习
活动
专区
圈层
工具
发布
社区首页 >问答首页 >Pandas MultiIndex DataFrame的多个MultiIndex特性集合

Pandas MultiIndex DataFrame的多个MultiIndex特性集合
EN

Code Review用户
提问于 2022-02-09 13:57:53
回答 1查看 104关注 0票数 1

编辑:最终的目标是监测恶劣天气的运动,确定其矢量并作出预测。

我正在与来自国家严重风暴实验室的GeoJSON FeatureCollection合作。数据集是可公开使用的这里

以下是单个JSON文件的一部分。

代码语言:javascript
复制
{"source": "NOAA/NCEP Central Operations",
 "product": "ProbSevere",
 "validTime": "20211011_010058 UTC",
 "productionTime": "20211011_010201 UTC",
 "machine": "vm-cprk-mrms-ops-probsvr1.ncep.noaa.gov",
 "type": "FeatureCollection",
 "features": [
     {"type": "Feature",
      "geometry": {
          "type": "Polygon",
          "coordinates": [[[-95.53, 38.92], [-95.50, 38.92], [-95.47, 38.90], [-95.47, 38.85], [-95.49, 38.83], [-95.49, 38.81], [-95.46, 38.78], [-95.37, 38.75], [-95.36, 38.74], [-95.36, 38.70], [-95.37, 38.69], [-95.39, 38.69], [-95.40, 38.68], [-95.46, 38.68], [-95.47, 38.67], [-95.51, 38.67], [-95.52, 38.66], [-95.56, 38.66], [-95.57, 38.67], [-95.59, 38.67], [-95.60, 38.68], [-95.68, 38.70], [-95.71, 38.73], [-95.71, 38.75], [-95.69, 38.78], [-95.60, 38.79], [-95.58, 38.81], [-95.58, 38.89], [-95.57, 38.89], [-95.53, 38.92]]]
      },
         "models": {
          "probsevere": {
              "PROB": "1",
              "LINE01": "ProbHail: 1%; ProbWind: 1%; ProbTor: 0%",
              "LINE02": "- MESH: 0.07 in.",
              "LINE03": "- VIL Density: 1.26 g/m^3",
              "LINE04": "- Flash Rate: 1 fl/min",
              "LINE05": "- Flash Density (max in last 30 min): 0.07 fl/min/km^2",
              "LINE06": "- Max LLAzShear: 0.003 /s",
              "LINE07": "- 98% LLAzShear: 0.002 /s",
              "LINE08": "- 98% MLAzShear: 0.003 /s",
              "LINE09": "- Norm. vert. growth rate: 2356Z 0.7%/min (weak)",
              "LINE10": "- EBShear: 45.9 kts; SRH 0-1km AGL: 93 m^2/s^2",
              "LINE11": "- MUCAPE: 440 J/kg; MLCAPE: 0 J/kg; MLCIN: 0 J/kg",
              "LINE12": "- MeanWind 1-3kmAGL: 15.6 kts",
              "LINE13": "- Wetbulb 0C hgt: 11.9 kft AGL",
              "LINE14": "- CAPE -10C to -30C: 123 J/kg; PWAT: 1.5 in.",
              "LINE15": "Avg. beam height (ARL): 2.96 kft / 0.90 km"
          },
          "probtor": {
              "PROB": "0",
              "LINE01": "ProbTor: 0%",
              "LINE02": "- Max LLAzShear: 0.003 /s (weak)",
              "LINE03": "- 98% LLAzShear: 0.002 /s (weak)",
              "LINE04": "- 98% MLAzShear: 0.003 /s (weak)",
              "LINE05": "- Flash Density: 0.07 fl/min/km^2",
              "LINE06": "- SRH 0-1km AGL: 93 m2/s2",
              "LINE07": "- EBShear: 45.9 kts",
              "LINE08": "- MeanWind 1-3kmAGL: 15.6 kts",
              "LINE09": "- MLCAPE/MLCIN: 0/0 J/kg",
              "LINE10": "Avg. beam height (ARL): 2.96 kft / 0.90 km"
          },
          "probhail": {
              "PROB": "1",
              "LINE01": "ProbHail: 1%",
              "LINE02": "- MESH: 0.07 in.",
              "LINE03": "- Flash Rate: 1 fl/min",
              "LINE04": "- Norm. vert. growth rate: 2356Z 0.7%/min (weak)",
              "LINE05": "- EBShear: 45.9 kts",
              "LINE06": "- CAPE -10C to -30C: 123 J/kg",
              "LINE07": "- PWAT: 1.5 in.",
              "LINE08": "- Wetbulb 0C hgt: 11.9 kft AGL"
          },
          "probwind": {
              "PROB": "1",
              "LINE01": "ProbWind: 1%",
              "LINE02": "- MESH: 0.07 in.",
              "LINE03": "- VIL Density: 1.26 g/m^3",
              "LINE04": "- Flash Rate: 1 fl/min",
              "LINE05": "- 98% LLAzShear: 0.002 /s (weak)",
              "LINE06": "- 98% MLAzShear: 0.003 /s (weak)",
              "LINE07": "- Norm. vert. growth rate: 2356Z 0.7%/min (weak)",
              "LINE08": "- EBShear: 45.9 kts",
              "LINE09": "- MeanWind 1-3kmAGL: 15.6 kts",
              "LINE10": "- MUCAPE: 440 J/kg; MLCAPE: 0 J/kg"
          }
      },
      "properties": {
          "MUCAPE": "440",
          "MLCAPE": "0",
          "MLCIN": "0",
          "EBSHEAR": "45.9",
          "SRH01KM": "93",
          "MEANWIND_1-3kmAGL": "15.6",
          "MESH": "0.07",
          "VIL_DENSITY": "1.26",
          "FLASH_RATE": "1",
          "FLASH_DENSITY": "0.07",
          "MAXLLAZ": "0.003",
          "P98LLAZ": "0.002",
          "P98MLAZ": "0.003",
          "MAXRC_EMISS": "2356Z 0.7%/min (weak)",
          "MAXRC_ICECF": "2251Z 0.0/min (weak)",
          "WETBULB_0C_HGT": "11.9",
          "PWAT": "1.5",
          "CAPE_M10M30": "123",
          "LJA": "0.0",
          "SIZE": "518",
          "AVG_BEAM_HGT": "2.96 kft / 0.90 km",
          "MOTION_EAST": "8.691",
          "MOTION_SOUTH": "-10.096",
          "PS": "5",
          "ID": "89234"
      }},
     {"type": "Feature",
         "geometry": {
             "type": "Polygon",
             "coordinates": [[[-96.73, 37.22], [-96.71, 37.22], [-96.69, 37.20], [-96.66, 37.19], [-96.63, 37.16], [-96.61, 37.16], [-96.60, 37.15], [-96.60, 37.11], [-96.57, 37.08], [-96.55, 37.08], [-96.54, 37.07], [-96.48, 37.07], [-96.46, 37.04], [-96.46, 37.01], [-96.50, 36.93], [-96.51, 36.93], [-96.54, 36.90], [-96.56, 36.90], [-96.57, 36.88], [-96.58, 36.88], [-96.62, 36.84], [-96.64, 36.84], [-96.65, 36.83], [-96.71, 36.83], [-96.72, 36.84], [-96.74, 36.84], [-96.74, 36.86], [-96.72, 36.88], [-96.72, 36.90], [-96.70, 36.92], [-96.66, 36.92], [-96.64, 36.94], [-96.64, 37.01], [-96.65, 37.02], [-96.65, 37.06], [-96.66, 37.07], [-96.68, 37.07], [-96.71, 37.03], [-96.75, 37.03], [-96.77, 37.05], [-96.77, 37.08], [-96.76, 37.09], [-96.76, 37.12], [-96.75, 37.13], [-96.75, 37.20], [-96.73, 37.22]]]
         },
      "models": {
             "probsevere": {
                 "PROB": "3",
                 "LINE01": "ProbHail: 1%; ProbWind: 3%; ProbTor: 0%",
                 "LINE02": "- MESH: 0.35 in.",
                 "LINE03": "- VIL Density: 1.96 g/m^3",
                 "LINE04": "- Flash Rate: 16 fl/min",
                 "LINE05": "- Flash Density (max in last 30 min): 0.51 fl/min/km^2",
                 "LINE06": "- Max LLAzShear: 0.003 /s",
                 "LINE07": "- 98% LLAzShear: 0.002 /s",
                 "LINE08": "- 98% MLAzShear: 0.003 /s",
                 "LINE09": "- Norm. vert. growth rate: N/A",
                 "LINE10": "- EBShear: 51.9 kts; SRH 0-1km AGL: 173 m^2/s^2",
                 "LINE11": "- MUCAPE: 1574 J/kg; MLCAPE: 1103 J/kg; MLCIN: -19 J/kg",
                 "LINE12": "- MeanWind 1-3kmAGL: 19.6 kts",
                 "LINE13": "- Wetbulb 0C hgt: 11.8 kft AGL",
                 "LINE14": "- CAPE -10C to -30C: 353 J/kg; PWAT: 1.8 in.",
                 "LINE15": "Avg. beam height (ARL): 5.04 kft / 1.54 km"
             },
             "probtor": {
                 "PROB": "0",
                 "LINE01": "ProbTor: 0%",
                 "LINE02": "- Max LLAzShear: 0.003 /s (weak)",
                 "LINE03": "- 98% LLAzShear: 0.002 /s (weak)",
                 "LINE04": "- 98% MLAzShear: 0.003 /s (weak)",
                 "LINE05": "- Flash Density: 0.51 fl/min/km^2",
                 "LINE06": "- SRH 0-1km AGL: 173 m2/s2",
                 "LINE07": "- EBShear: 51.9 kts",
                 "LINE08": "- MeanWind 1-3kmAGL: 19.6 kts",
                 "LINE09": "- MLCAPE/MLCIN: 1103/-19 J/kg",
                 "LINE10": "Avg. beam height (ARL): 5.04 kft / 1.54 km"
             },
             "probhail": {
                 "PROB": "1",
                 "LINE01": "ProbHail: 1%",
                 "LINE02": "- MESH: 0.35 in.",
                 "LINE03": "- Flash Rate: 16 fl/min",
                 "LINE04": "- Norm. vert. growth rate: N/A",
                 "LINE05": "- EBShear: 51.9 kts",
                 "LINE06": "- CAPE -10C to -30C: 353 J/kg",
                 "LINE07": "- PWAT: 1.8 in.",
                 "LINE08": "- Wetbulb 0C hgt: 11.8 kft AGL"
             },
             "probwind": {
                 "PROB": "3",
                 "LINE01": "ProbWind: 3%",
                 "LINE02": "- MESH: 0.35 in.",
                 "LINE03": "- VIL Density: 1.96 g/m^3",
                 "LINE04": "- Flash Rate: 16 fl/min",
                 "LINE05": "- 98% LLAzShear: 0.002 /s (weak)",
                 "LINE06": "- 98% MLAzShear: 0.003 /s (weak)",
                 "LINE07": "- Norm. vert. growth rate: N/A",
                 "LINE08": "- EBShear: 51.9 kts",
                 "LINE09": "- MeanWind 1-3kmAGL: 19.6 kts",
                 "LINE10": "- MUCAPE: 1574 J/kg; MLCAPE: 1103 J/kg"
             }
         }
      }
 ]}

现在我最感兴趣的是。

代码语言:javascript
复制
collection['validTime']
feat['geometry']['coordinates']
feat['property']['ID']
feat['models']['probsevere'&'probtor'&'probwind'&'probhail']['PROB']

这是我得到的。

代码语言:javascript
复制
IDX = pd.IndexSlice


def load_samples(path: str) -> pd.DataFrame:
    with open(path) as feat:
        collection = json.load(feat)
    features = collection.pop('features')

    def make_parameters(feat: Dict[str, Any]) -> pd.Series:
        # index model probs
        probabilities = (
            pd.DataFrame(feat['models']).loc['PROB']).astype(int)
        # insert coordinates
        probabilities['coordinates'] = feat['geometry']['coordinates']

        return probabilities

    dataframe = pd.DataFrame.from_dict({
        # index by id
        feat['properties']['ID']: make_parameters(feat)
        # itterate features
        for feat in features
    }, orient='index')

    # insert validTIme
    dataframe['validTime'] = (
        pd.to_datetime(collection['validTime'], format="%Y%m%d_%H%M%S UTC"))

    return (
        dataframe
        # index validtime
        .set_index(['validTime'], append=True, drop=True)
        # stack columns
        .stack()
        # move validtime to column
        .unstack(1)
        # give index names
        .rename_axis(['ID', 'parameter'])
    )


if __name__ == "__main__":
    paths = glob(os.path.join('sample_data/', '*.json'))

    samps = pd.concat([load_samples(path) for path in paths], axis=1)

    samps = samps.reindex(sorted(samps.columns), axis=1)

    print(samps)

退出:

代码语言:javascript
复制
validTime                                        2021-10-11 00:00:53                                2021-10-11 00:02:40  ...                                2021-10-11 00:58:56                                2021-10-11 01:00:58
ID    parameter                                                                                                          ...                                                                                                      
89234 coordinates  [[[-96.06, 38.4], [-95.99, 38.4], [-95.97, 38....  [[[-96.03, 38.41], [-95.98, 38.41], [-95.96, 3...  ...  [[[-95.55, 38.91], [-95.51, 38.91], [-95.48, 3...  [[[-95.53, 38.92], [-95.5, 38.92], [-95.47, 38...
      probhail                                                     1                                                  1  ...                                                  1                                                  1
      probsevere                                                   1                                                  1  ...                                                  1                                                  1
      probtor                                                      0                                                  0  ...                                                  0                                                  0
      probwind                                                     1                                                  1  ...                                                  1                                                  1
...                                                              ...                                                ...  ...                                                ...                                                ...
90393 coordinates                                                NaN                                                NaN  ...                                                NaN  [[[-99.35, 31.38], [-99.31, 31.38], [-99.3, 31...
      probhail                                                   NaN                                                NaN  ...                                                NaN                                                  8
      probsevere                                                 NaN                                                NaN  ...                                                NaN                                                 10
      probtor                                                    NaN                                                NaN  ...                                                NaN                                                  1
      probwind                                                   NaN                                                NaN  ...                                                NaN                                                 10

[1880 rows x 31 columns]

用法:

代码语言:javascript
复制
print(samps.loc[IDX['89234', 'coordinates'], :])

退出:

代码语言:javascript
复制
validTime
2021-10-11 00:00:53    [[[-96.06, 38.4], [-95.99, 38.4], [-95.97, 38....
2021-10-11 00:02:40    [[[-96.03, 38.41], [-95.98, 38.41], [-95.96, 3...
2021-10-11 00:04:47    [[[-96.0, 38.43], [-95.94, 38.43], [-95.93, 38...
2021-10-11 00:06:55    [[[-95.96, 38.45], [-95.92, 38.43], [-95.9, 38...
2021-10-11 00:08:38    [[[-95.98, 38.46], [-95.93, 38.46], [-95.9, 38...
2021-10-11 00:10:59    [[[-95.95, 38.48], [-95.91, 38.48], [-95.9, 38...
2021-10-11 00:12:40    [[[-95.96, 38.48], [-95.91, 38.48], [-95.88, 3...
2021-10-11 00:14:44    [[[-95.93, 38.5], [-95.88, 38.5], [-95.86, 38....
2021-10-11 00:16:50    [[[-95.89, 38.53], [-95.86, 38.52], [-95.82, 3...
2021-10-11 00:18:39    [[[-95.89, 38.53], [-95.87, 38.53], [-95.83, 3...
2021-10-11 00:20:48    [[[-95.87, 38.55], [-95.85, 38.55], [-95.81, 3...
2021-10-11 00:22:57    [[[-95.86, 38.56], [-95.82, 38.56], [-95.79, 3...
2021-10-11 00:24:40    [[[-95.85, 38.58], [-95.8, 38.58], [-95.77, 38...
2021-10-11 00:26:58    [[[-95.84, 38.59], [-95.78, 38.59], [-95.75, 3...
2021-10-11 00:28:52    [[[-95.81, 38.61], [-95.77, 38.61], [-95.76, 3...
2021-10-11 00:30:43    [[[-95.81, 38.63], [-95.76, 38.62], [-95.64, 3...
2021-10-11 00:32:56    [[[-95.81, 38.64], [-95.77, 38.64], [-95.75, 3...
2021-10-11 00:34:59    [[[-95.81, 38.64], [-95.75, 38.64], [-95.7, 38...
2021-10-11 00:36:41    [[[-95.77, 38.67], [-95.73, 38.66], [-95.68, 3...
2021-10-11 00:38:44    [[[-95.78, 38.67], [-95.74, 38.67], [-95.67, 3...
2021-10-11 00:40:52    [[[-95.77, 38.67], [-95.73, 38.67], [-95.72, 3...
2021-10-11 00:42:42    [[[-95.77, 38.67], [-95.71, 38.67], [-95.7, 38...
2021-10-11 00:44:53    [[[-95.7, 38.68], [-95.67, 38.68], [-95.66, 38...
2021-10-11 00:46:51    [[[-95.68, 38.7], [-95.63, 38.7], [-95.62, 38....
2021-10-11 00:48:54    [[[-95.68, 38.71], [-95.58, 38.71], [-95.54, 3...
2021-10-11 00:50:59    [[[-95.59, 38.84], [-95.54, 38.84], [-95.53, 3...
2021-10-11 00:52:59    [[[-95.57, 38.87], [-95.54, 38.87], [-95.51, 3...
2021-10-11 00:54:52    [[[-95.56, 38.89], [-95.51, 38.89], [-95.5, 38...
2021-10-11 00:56:52    [[[-95.56, 38.89], [-95.5, 38.89], [-95.49, 38...
2021-10-11 00:58:56    [[[-95.55, 38.91], [-95.51, 38.91], [-95.48, 3...
2021-10-11 01:00:58    [[[-95.53, 38.92], [-95.5, 38.92], [-95.47, 38...
Name: (89234, coordinates), dtype: object
EN

回答 1

Code Review用户

发布于 2022-02-09 18:44:38

您的IDX似乎未使用,所以您可以删除它。

您在添加初始类型提示方面做得很好。

您的数据存在一个标准化问题:您的坐标与其模型之间存在明显的基数不匹配。解决这一问题的最明智的方法是返回两个使用相同索引的单独数据,一个专用于概率,另一个用于坐标。坐标数据将解压嵌套列表,并具有简单的xy列。

我不同意堆叠的选择。概率标题不是很好地表示为索引级别,应该只是列。

您的日期时间格式不应该硬编码UTC,而应该是使用%Z

建议

代码语言:javascript
复制
import json
from typing import Any, Iterator

import pandas as pd


def load_samples(path: str) -> tuple[
    pd.DataFrame,  # probability dataframe
    pd.DataFrame,  # coordinate dataframe
]:
    with open(path) as feat:
        collection = json.load(feat)
    features: list[dict[str, Any]] = collection['features']

    def make_probs() -> Iterator[dict[str, Any]]:
        for feat in features:
            probs = {
                model_name: int(model['PROB'])
                for model_name, model in feat['models'].items()
            }
            yield {
                'ID': feat['properties']['ID'],
                **probs,
            }

    def make_coords() -> Iterator[dict[str, Any]]:
        for feat in features:
            coords, = feat['geometry']['coordinates']
            for x, y in coords:
                yield {
                    'ID': feat['properties']['ID'],
                    'x': x, 'y': y,
                }

    prob_df = pd.DataFrame.from_records(tuple(make_probs()), index='ID')
    coord_df = pd.DataFrame.from_records(tuple(make_coords()), index='ID')

    dt = pd.Series(
        name='validTime',
        data=pd.to_datetime(collection['validTime'], format='%Y%m%d_%H%M%S %Z'),
    )

    prob_df.set_index(dt.repeat(len(prob_df)), append=True, inplace=True)
    coord_df.set_index(dt.repeat(len(coord_df)), append=True, inplace=True)

    return prob_df, coord_df


def main() -> None:
    prob_df, coord_df = load_samples('MRMS_PROBSEVERE_20220209_164430.json')

    print(prob_df)
    print(coord_df)


if __name__ == '__main__':
    main()

输出

代码语言:javascript
复制
                                 probsevere  probtor  probhail  probwind
ID    validTime                                                         
57216 2022-02-09 16:44:30+00:00           2        0         2         1
57240 2022-02-09 16:44:30+00:00           1        0         0         1
57247 2022-02-09 16:44:30+00:00           2        0         0         2
57255 2022-02-09 16:44:30+00:00           1        0         0         1
57256 2022-02-09 16:44:30+00:00           1        0         0         1
57258 2022-02-09 16:44:30+00:00           1        0         0         1
                                     x      y
ID    validTime                              
57216 2022-02-09 16:44:30+00:00 -79.38  26.36
      2022-02-09 16:44:30+00:00 -79.34  26.34
      2022-02-09 16:44:30+00:00 -79.33  26.32
      2022-02-09 16:44:30+00:00 -79.34  26.29
      2022-02-09 16:44:30+00:00 -79.36  26.28
...                                ...    ...
57258 2022-02-09 16:44:30+00:00 -86.20  30.33
      2022-02-09 16:44:30+00:00 -86.11  30.33
      2022-02-09 16:44:30+00:00 -86.11  30.25
      2022-02-09 16:44:30+00:00 -86.20  30.25
      2022-02-09 16:44:30+00:00 -86.20  30.33

[94 rows x 2 columns]
票数 1
EN
页面原文内容由Code Review提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://codereview.stackexchange.com/questions/273912

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档