Quick Start

DataFrame

>>> import dataiter as di
>>> data = di.read_csv("data/listings.csv")
>>> data.price_per_guest = data.price / data.guests
>>> data.head()
.
     id      hood zipcode guests    sqft price price_per_guest
  int64      <U13    <U11  int64 float64 int64         float64
  ───── ───────── ─────── ────── ─────── ───── ───────────────
0  2060 Manhattan   10040      2     nan   100          50.000
1  2595 Manhattan   10018      2     nan   225         112.500
2  3831  Brooklyn   11238      3     500    89          29.667
3  5099 Manhattan   10016      2     nan   200         100.000
4  5121  Brooklyn   11216      2     nan    60          30.000
5  5136  Brooklyn   11232      4     nan   253          63.250
6  5178 Manhattan   10019      2     nan    79          39.500
7  5203 Manhattan   10025      1     nan    79          79.000
8  5238 Manhattan   10002      2     nan   150          75.000
9  5441 Manhattan   10036      2     nan    99          49.500
.
>>> data.filter(hood="Manhattan").filter(guests=2).sort(price=1).head()
.
        id      hood zipcode guests    sqft price price_per_guest
     int64      <U13    <U11  int64 float64 int64         float64
  ──────── ───────── ─────── ────── ─────── ───── ───────────────
0 42279170 Manhattan   10013      2     nan     0             0.0
1 42384530 Manhattan   10036      2     nan     0             0.0
2 18835820 Manhattan   10021      2     nan    10             5.0
3 20171179 Manhattan   10027      2     nan    10             5.0
4 14858544 Manhattan     nan      2     nan    15             7.5
5 31397084 Manhattan   10002      2     nan    19             9.5
6 22289683 Manhattan   10031      2     nan    20            10.0
7  7760204 Manhattan   10040      2     nan    22            11.0
8 43292527 Manhattan   10033      2     nan    22            11.0
9 43268040 Manhattan   10033      2     nan    23            11.5
.

GeoJSON

>>> import dataiter as di
>>> data = di.read_geojson("data/neighbourhoods.geojson")
>>> data.head()
.
     neighbourhood neighbourhood_group       geometry
              <U26                <U13         object
  ──────────────── ─────────────────── ──────────────
0        Bayswater              Queens <MultiPolygon>
1         Allerton               Bronx <MultiPolygon>
2      City Island               Bronx <MultiPolygon>
3 Ditmars Steinway              Queens <MultiPolygon>
4       Ozone Park              Queens <MultiPolygon>
5          Fordham               Bronx <MultiPolygon>
6       Whitestone              Queens <MultiPolygon>
7    Arden Heights       Staten Island <MultiPolygon>
8         Arrochar       Staten Island <MultiPolygon>
9          Arverne              Queens <MultiPolygon>
.

ListOfDicts

>>> import dataiter as di
>>> data = di.read_json("data/listings.json")
>>> data = data.modify(price_per_guest=lambda x: x.price / x.guests)
>>> data.head()
[
  {
    "id": 2060,
    "hood": "Manhattan",
    "zipcode": "10040",
    "guests": 2,
    "sqft": null,
    "price": 100,
    "price_per_guest": 50.0
  },
  {
    "id": 2595,
    "hood": "Manhattan",
    "zipcode": "10018",
    "guests": 2,
    "sqft": null,
    "price": 225,
    "price_per_guest": 112.5
  },
  {
    "id": 3831,
    "hood": "Brooklyn",
    "zipcode": "11238",
    "guests": 3,
    "sqft": 500.0,
    "price": 89,
    "price_per_guest": 29.666666666666668
  }
]
>>> data.filter(hood="Manhattan").filter(guests=2).sort(price=1).head()
[
  {
    "id": 42279170,
    "hood": "Manhattan",
    "zipcode": "10013",
    "guests": 2,
    "sqft": null,
    "price": 0,
    "price_per_guest": 0.0
  },
  {
    "id": 42384530,
    "hood": "Manhattan",
    "zipcode": "10036",
    "guests": 2,
    "sqft": null,
    "price": 0,
    "price_per_guest": 0.0
  },
  {
    "id": 18835820,
    "hood": "Manhattan",
    "zipcode": "10021",
    "guests": 2,
    "sqft": null,
    "price": 10,
    "price_per_guest": 5.0
  }
]