e:[["$","div",null,{"className":"wide-content-wrapper podcast-content","children":[[["$","div","0",{"className":"pb-8"}],["$","div","1",{"className":"pb-8"}]],[["$","section",null,{"className":"flex flex-col md:flex-row gap-8","children":[["$","div",null,{"className":"flex justify-center md:justify-start","children":["$","div",null,{"className":"min-w-[240px] lg:min-w-[300px] max-w-[300px] max-h-[300px] aspect-square overflow-hidden rounded-md","children":["$","$La",null,{"src":"https://static.libsyn.com/p/assets/0/e/4/b/0e4bd71bb64c6e45/DS_-_New_Logo_assets_-_JL_DS_Logo_Stacked_-_Color_2.jpg","width":300,"height":300,"alt":"Building the howto100m Video Corpus - podcast episode cover","priority":true,"unoptimized":true}]}]}],["$","div",null,{"className":"flex flex-col w-full","children":[["$","h1",null,{"children":"Building the howto100m Video Corpus"}],[["$","div",null,{"className":"flex justify-center md:justify-start","children":["$","$L7",null,{"href":"/podcasts/651046e2-e265-505b-8cc5-be51fb4ff3c5","className":"items-center text-link-color text-center md:text-start","aria-label":"Go to podcast page","children":[["$","svg",null,{"xmlns":"http://www.w3.org/2000/svg","viewBox":"0 0 24 24","fill":"currentColor","aria-hidden":"true","data-slot":"icon","ref":"$undefined","aria-labelledby":"$undefined","className":"w-[1rem] h-[1rem] inline mr-1","children":[null,["$","path",null,{"d":"M8.25 4.5a3.75 3.75 0 1 1 7.5 0v8.25a3.75 3.75 0 1 1-7.5 0V4.5Z"}],["$","path",null,{"d":"M6 10.5a.75.75 0 0 1 .75.75v1.5a5.25 5.25 0 1 0 10.5 0v-1.5a.75.75 0 0 1 1.5 0v1.5a6.751 6.751 0 0 1-6 6.709v2.291h3a.75.75 0 0 1 0 1.5h-7.5a.75.75 0 0 1 0-1.5h3v-2.291a6.751 6.751 0 0 1-6-6.709v-1.5A.75.75 0 0 1 6 10.5Z"}]]}],["$","span",null,{"children":"Data Skeptic"}]]}]}],["$","div",null,{"className":"flex flex-row gap-2 text-sm items-center flex-wrap justify-center md:justify-start mt-1","children":[["$","span",null,{"children":"Aug 19, 2019"}],[["$","span",null,{"children":"•"}],["$","span",null,{"children":"23 min"}]],null,["$","span",null,{"children":"•"}],["$","span",null,{"children":["Transcript available on ",["$","$L7",null,{"href":"https://metacast.app","children":"Metacast"}]]}]]}],["$","div",null,{"className":"mx-auto md:mx-0 w-full max-w-[400px] mt-6","children":["$","$L1d",null,{"src":"https://traffic.libsyn.com/secure/dataskeptic/building-the-howto100m-video-corpus.mp3?dest-id=201630"}]}],["$","div",null,{"className":"flex flex-col md:flex-row flex-wrap gap-y-1 md:gap-x-3 justify-center md:justify-start items-center mt-6","children":[["$","div",null,{"className":"text-sm text-gray-600 dark:text-gray-400 text-center md:text-start","children":"Listen in podcast apps:"}],["$","div",null,{"className":"flex flex-row flex-wrap gap-3 justify-center md:justify-start","children":[["$","$L7",null,{"href":"https://metacast.app","className":"flex flex-row items-center gap-1 font-light text-base text-sm","children":[["$","$La",null,{"src":"/images/icons/icon-sm.jpg","className":"rounded-md","width":24,"height":24,"alt":"Listen on Metacast","aria-hidden":true,"priority":true}],["$","div",null,{"children":"Metacast"}]]}],["$","$L1e",null,{"episodeData":{"title":"Building the howto100m Video Corpus","episodeGuid":"651046e2_e265_505b_8cc5_be51fb4ff3c5_c4ffd9a0bb664aba968b8d234ca8c306","podcastGuid":"651046e2-e265-505b-8cc5-be51fb4ff3c5","durationSeconds":1358,"imageUrl":"$undefined","seasonNum":"$undefined","episodeNum":"$undefined","publishedAt":"$D2019-08-19T20:12:43.000Z","description":"

Video annotation is an expensive and time-consuming process. As a consequence, the available video datasets are useful but small. The availability of machine transcribed explainer videos offers a unique opportunity to rapidly develop a useful, if dirty, corpus of videos that are \"self annotating\", as hosts explain the actions they are taking on the screen.

This episode is a discussion of the HowTo100m dataset - a project which has assembled a video corpus of 136M video clips with captions covering 23k activities.

Related Links

The paper will be presented at ICCV 2019

@antoine77340

Antoine on Github

Antoine's homepage

","enclosureUrl":"https://traffic.libsyn.com/secure/dataskeptic/building-the-howto100m-video-corpus.mp3?dest-id=201630"},"podcastData":{"artworkUrl":"https://static.libsyn.com/p/assets/0/e/4/b/0e4bd71bb64c6e45/DS_-_New_Logo_assets_-_JL_DS_Logo_Stacked_-_Color_2.jpg","description":"The Data Skeptic Podcast features interviews and discussion of topics related to data science, statistics, machine learning, artificial intelligence and the like, all from the perspective of applying critical thinking and the scientific method to evaluate the veracity of claims and efficacy of approaches.","latestEpisodePublishedAt":"$D2025-03-10T15:00:00.000Z","podcastGuid":"651046e2-e265-505b-8cc5-be51fb4ff3c5","iTunesCollectionId":890348705,"artistName":"Kyle Polich","title":"Data Skeptic","categories":["Technology","Science","Mathematics"],"feedUrl":"https://dataskeptic.libsyn.com/rss","url":"https://dataskeptic.com"},"isLoader":false}]]}]]}]]]}]]}],["$","section",null,{"className":"episode-description whitespace-pre-wrap space-y-3 mt-8 md:mt-12","children":[["$","h2",null,{"className":"text-2xl font-semibold dark:page-title-gradient-dark mt-12 text-center md:text-start mb-4","children":"Episode description"}],[["$","p","0",{"children":"Video annotation is an expensive and time-consuming process. As a consequence, the available video datasets are useful but small. The availability of machine transcribed explainer videos offers a unique opportunity to rapidly develop a useful, if dirty, corpus of videos that are \"self annotating\", as hosts explain the actions they are taking on the screen."}]," ",["$","p","2",{"children":["This episode is a discussion of the ",["$","a","1",{"href":"https://www.di.ens.fr/willow/research/howto100m/","children":"HowTo100m"}]," dataset - a project which has assembled a video corpus of 136M video clips with captions covering 23k activities."]}]," Related Links ",["$","p","4",{"children":["The paper will be presented at ",["$","a","1",{"href":"http://iccv2019.thecvf.com/","children":"ICCV 2019"}]]}]," ",["$","p","6",{"children":["$","a",null,{"href":"https://twitter.com/antoine77340","children":"@antoine77340"}]}]," ",["$","p","8",{"children":["$","a",null,{"href":"https://github.com/antoine77340","children":"Antoine on Github"}]}]," ",["$","p","10",{"children":["$","a",null,{"href":"https://www.di.ens.fr/~miech/","children":"Antoine's homepage"}]}]]]}]],[["$","div","0",{"className":"pb-8"}],["$","div","1",{"className":"pb-8"}]]]}],["$","section",null,{"children":["$","script",null,{"type":"application/ld+json","dangerouslySetInnerHTML":{"__html":"$1f"}}]}],"$undefined"]

‌

Episode description

Building the howto100m Video Corpus

Episode description