Strip Plot with Jitter#

In this chart, we encode the Major_Genre column from the movies dataset in the y-channel. In the default presentation of this data, it would be difficult to gauge the relative frequencies with which different values occur because there would be so much overlap. To address this, we use the yOffset channel to incorporate a random offset (jittering). The example is shown twice, on the left side using normally distributed and on the right side using uniformally distributed jitter.

import altair as alt
from altair.datasets import data

source = data.movies.url

gaussian_jitter = alt.Chart(source, title='Normally distributed jitter').mark_circle(size=8).encode(
    y="Major Genre:N",
    x="IMDB Rating:Q",
    yOffset="jitter:Q",
    color=alt.Color('Major Genre:N').legend(None)
).transform_calculate(
    # Generate Gaussian jitter with a Box-Muller transform
    jitter="sqrt(-2*log(random()))*cos(2*PI*random())"
)

uniform_jitter = gaussian_jitter.transform_calculate(
    # Generate uniform jitter
    jitter='random()'
).encode(
    alt.Y('Major Genre:N').axis(None)
).properties(
    title='Uniformly distributed jitter'
)

(gaussian_jitter | uniform_jitter).resolve_scale(yOffset='independent')
import altair as alt
from altair.datasets import data

source = data.movies.url

gaussian_jitter = alt.Chart(source, title='Normally distributed jitter').mark_circle(size=8).encode(
    y="Major Genre:N",
    x="IMDB Rating:Q",
    yOffset="jitter:Q",
    color=alt.Color('Major Genre:N', legend=None)
).transform_calculate(
    # Generate Gaussian jitter with a Box-Muller transform
    jitter="sqrt(-2*log(random()))*cos(2*PI*random())"
)

uniform_jitter = gaussian_jitter.transform_calculate(
    # Generate uniform jitter
    jitter='random()'
).encode(
    y=alt.Y('Major Genre:N', axis=None)
).properties(
    title='Uniformly distributed jitter'
)

(gaussian_jitter | uniform_jitter).resolve_scale(yOffset='independent')