11:[["$","$L124",null,{"props":{"lessonContent":{"components":[{"type":"MarkdownEditor","children":[{"text":"/ma"}],"mode":"edit","content":{"version":"2.0","text":"## Overview\nThe majority of the use cases we encounter in our day-to-day analysis or data engineering work can be resolved with methods or functions provided by the SQL or DataFrame API in PySpark. If built-in methods are not enough, we can write our own function, which we can use for a custom transformation.\nWriting user-defined functions requires a deeper understanding of both data structure and how a pure Python data structure is represented in PySpark. The return type of **user defined functions (UDF)** must be static. Therefore, the return data structure must be provided by us in a form of PySpark type. Moreover, UDF are the most expensive (less optimized) operations, hence we use them only when necessary and have no other choice.\n\n","mdHtml":"

Overview

The majority of the use cases we encounter in our day-to-day analysis or data engineering work can be resolved with methods or functions provided by the SQL or DataFrame API in PySpark. If built-in methods are not enough, we can write our own function, which we can use for a ...

","comp_id":"ihlAlX5AiR54_4A0k3GoW"},"iteration":5,"hash":0,"saveVersion":9,"contentID":"N7EqJgi_8XzeOBrCSkTaU"}],"summary":{"description":"Learn about user-defined functions in detail.","titleUpdated":true,"title":"Introduction to User-defined Functions\n"},"content":[{"type":"MarkdownEditor","children":[{"text":"/ma"}],"mode":"edit","content":{"version":"2.0","text":"# Overview\nThe majority of the use cases we encounter in our day-to-day analysis or data engineering work can be resolved with methods or functions provided by the SQL or DataFrame API in PySpark. If built-in methods are not enough, we can write our own function, which we can use for a custom transformation.\nWriting user-defined functions requires a deeper understanding of both data structure and how a pure Python data structure is represented in PySpark. The return type of **user defined functions (UDF)** must be static. Therefore, the return data structure must be provided by us in a form of PySpark type. Moreover, UDF are the most expensive (less optimized) operations, hence we use them only when necessary and have no other choice.\n\n","mdHtml":"

Overview

","comp_id":"ihlAlX5AiR54_4A0k3GoW"},"iteration":5,"hash":0,"saveVersion":9,"contentID":"N7EqJgi_8XzeOBrCSkTaU"}],"darkModeContent":[{"type":"MarkdownEditor","children":[{"text":"/ma"}],"mode":"edit","content":{"version":"2.0","text":"# Overview\nThe majority of the use cases we encounter in our day-to-day analysis or data engineering work can be resolved with methods or functions provided by the SQL or DataFrame API in PySpark. If built-in methods are not enough, we can write our own function, which we can use for a custom transformation.\nWriting user-defined functions requires a deeper understanding of both data structure and how a pure Python data structure is represented in PySpark. The return type of **user defined functions (UDF)** must be static. Therefore, the return data structure must be provided by us in a form of PySpark type. Moreover, UDF are the most expensive (less optimized) operations, hence we use them only when necessary and have no other choice.\n\n","mdHtml":"

Overview

","comp_id":"ihlAlX5AiR54_4A0k3GoW"},"iteration":5,"hash":0,"saveVersion":9,"contentID":"N7EqJgi_8XzeOBrCSkTaU"}]},"isPreviewLesson":false,"pageType":"collection_lesson","aiCoachVideoUrl":"https://youtu.be/kgl8y9J3O6c","collectionDetailsSSR":{"title":"From Pandas to PySpark DataFrame","summary":"Pandas is a popular Python library used to manipulate data, but it has certain limitations in its ability to process large datasets. The Apache Spark analytics library offers significant performance improvements.\n\nThis course will help improve your Python-based data processing by leveraging Apache Spark’s multithreading capabilities through the PySpark library. You’ll start by reading data into a PySpark DataFrame before performing basic input/output functions, such as renaming attributes, selecting, and writing data. You’ll move onto transformation functions like aggregation, statistical analysis, and joins before creating custom, user-defined functions. At each step, you’ll get a quick Pandas review before being walked through leveraging the more robust PySpark library to unlock Apache Spark.\n\nBy the end of this course, you’ll be able to quickly and reliably process large amounts of data, even stored across multiple files, using PySpark.","details":"","clos":["A working knowledge of Apache Spark and the PySpark library for Python","A strong understanding of the advantages of using PySpark instead of Pandas for processing large datasets","The ability to calculate some Metrics or produce aggregated analytics reporting solutions","The ability to write Production Code in PySpark"],"arabic_available":false,"toc":{"categories":[{"id":"0fo40nuwn","title":"Introduction","pages":[{"id":4551515674509312,"title":"Getting Started","is_preview":true,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","author_id":10370001,"collection_id":4603192435802112,"page_id":4551515674509312,"is_lesson":false,"slug":"getting-started"},{"id":5373491146129408,"title":"Overview of Dataset","is_preview":true,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","author_id":10370001,"collection_id":4603192435802112,"page_id":5373491146129408,"is_lesson":true,"slug":"overview-of-dataset"}],"editMode":false,"type":"COLLECTION_CATEGORY","summary":"Learn how to use PySpark for large-scale data processing and Amazon Review Data analysis."},{"id":"9smgudj0k","title":"Data Input/Output","pages":[{"id":5110078922817536,"title":"Introduction to Data Input and Output","is_preview":true,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","author_id":10370001,"collection_id":4603192435802112,"page_id":5110078922817536,"slug":"introduction-to-data-input-and-output"},{"id":5436273887543296,"title":"Read Data into DataFrame","is_preview":false,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","author_id":10370001,"collection_id":4603192435802112,"page_id":5436273887543296,"slug":"read-data-into-dataframe"},{"id":6273870775975936,"title":"Rename Attributes","is_preview":false,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","author_id":10370001,"collection_id":4603192435802112,"page_id":6273870775975936,"slug":"rename-attributes"},{"id":4997668924817408,"title":"Select a Subset of Attributes","is_preview":false,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","author_id":10370001,"collection_id":4603192435802112,"page_id":4997668924817408,"slug":"select-a-subset-of-attributes"},{"id":5632546074787840,"title":"Data Input and Output: Save a Snapshot","is_preview":false,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","author_id":10370001,"collection_id":4603192435802112,"page_id":5632546074787840,"slug":"data-input-and-output-save-a-snapshot"},{"id":5550818551398400,"title":"Read Parquet Data Source","is_preview":false,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","author_id":10370001,"collection_id":4603192435802112,"page_id":5550818551398400,"slug":"read-parquet-data-source"},{"id":6181638853099520,"title":"Write Production Code","is_preview":false,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","author_id":10370001,"collection_id":4603192435802112,"page_id":6181638853099520,"slug":"write-production-code"},{"id":5822414566457344,"title":"Quiz: Data Input and Output","is_preview":false,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","author_id":10370001,"collection_id":4603192435802112,"page_id":5822414566457344,"slug":"quiz-data-input-and-output"},{"id":5604284519677952,"title":"Challenge: Data Input and Output","is_preview":false,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","author_id":10370001,"collection_id":4603192435802112,"page_id":5604284519677952,"slug":"challenge-data-input-and-output"},{"id":5893114711769088,"title":"Solution: Data Input and Output","is_preview":false,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","author_id":10370001,"collection_id":4603192435802112,"page_id":5893114711769088,"slug":"solution-data-input-and-output"}],"editMode":false,"type":"COLLECTION_CATEGORY","summary":"Walk through data input/output processes including reading, renaming, selecting, saving, and challenges."},{"id":"34g9fvriy","title":"Data Transformation","pages":[{"id":5248305415585792,"title":"Introduction to Data Transformation","is_preview":true,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","author_id":10370001,"collection_id":4603192435802112,"page_id":5248305415585792,"slug":"introduction-to-data-transformation"},{"id":5636323330752512,"title":"Setup","is_preview":false,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","author_id":10370001,"collection_id":4603192435802112,"page_id":5636323330752512,"slug":"setup"},{"id":4657779658194944,"title":"Handling Date-time","is_preview":false,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","author_id":10370001,"collection_id":4603192435802112,"page_id":4657779658194944,"slug":"handling-date-time"},{"id":4722451228917760,"title":"Impute Unavailable Data Points","is_preview":false,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","slug":"impute-unavailable-data-points"},{"id":4690628876697600,"title":"Average Review per Product","is_preview":false,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","slug":"average-review-per-product"},{"id":4613371709620224,"title":"Total Number of Reviews for Each Product","is_preview":false,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","slug":"total-number-of-reviews-for-each-product"},{"id":6209248849035264,"title":"Distribution of the Review Text Length","is_preview":false,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","slug":"distribution-of-the-review-text-length"},{"id":4547273337339904,"title":"Yearly Median Review","is_preview":false,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","slug":"yearly-median-review"},{"id":5598158452686848,"title":"Top reviews of 2017","is_preview":false,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","slug":"top-reviews-of-2017"},{"id":5384200898740224,"title":"Compare Total Review of 2016 and 2017","is_preview":false,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","slug":"compare-total-review-of-2016-and-2017"},{"id":6083742589779968,"title":"Conversion Between Wide and Long Format using melt and pivot","is_preview":false,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","slug":"conversion-between-wide-and-long-format-using-melt-and-pivot"},{"id":6518708758904832,"title":"Date Transformation: Save a Snapshot","is_preview":false,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","slug":"date-transformation-save-a-snapshot"},{"id":5029245104947200,"title":"Avoid Global Scope","is_preview":false,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","author_id":10370001,"collection_id":4603192435802112,"page_id":5029245104947200,"slug":"avoid-global-scope"},{"id":6463130405699584,"title":"Quiz: Data Transformation","is_preview":false,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","author_id":10370001,"collection_id":4603192435802112,"page_id":6463130405699584,"slug":"quiz-data-transformation"},{"id":5419636845969408,"title":"Challenge: Data Transformation","is_preview":false,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","author_id":10370001,"collection_id":4603192435802112,"page_id":5419636845969408,"slug":"challenge-data-transformation"},{"id":6271759028584448,"title":"Solution: Data Transformation","is_preview":false,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","author_id":10370001,"collection_id":4603192435802112,"page_id":6271759028584448,"slug":"solution-data-transformation"}],"editMode":false,"type":"COLLECTION_CATEGORY","summary":"Work your way through transforming data, handling date-time, imputing, and evaluating reviews using pandas and PySpark."},{"id":"6p8skdh46","title":"User Defined Function (UDF)","pages":[{"id":5735610928594944,"title":"Introduction to User-defined Functions","is_preview":true,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","author_id":10370001,"collection_id":4603192435802112,"page_id":5735610928594944,"slug":"introduction-to-user-defined-functions"},{"id":5794177304756224,"title":"Object Conversion Between Python and Scala","is_preview":false,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","author_id":10370001,"collection_id":4603192435802112,"page_id":5794177304756224,"slug":"object-conversion-between-python-and-scala"},{"id":4854219483381760,"title":"Writing UDF","is_preview":false,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","author_id":10370001,"collection_id":4603192435802112,"page_id":4854219483381760,"slug":"writing-udf"},{"id":5055436890046464,"title":"UDF in Action","is_preview":false,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","author_id":10370001,"collection_id":4603192435802112,"page_id":5055436890046464,"slug":"udf-in-action"},{"id":6612337704566784,"title":"UDF: Save a snapshot","is_preview":false,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","author_id":10370001,"collection_id":4603192435802112,"page_id":6612337704566784,"slug":"udf-save-a-snapshot"},{"id":5531610048364544,"title":"Quiz: User-defined Functions","is_preview":false,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","author_id":10370001,"collection_id":4603192435802112,"page_id":5531610048364544,"slug":"quiz-user-defined-functions"},{"id":6072628322500608,"title":"Challenge: User-defined Functions","is_preview":false,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","author_id":10370001,"collection_id":4603192435802112,"page_id":6072628322500608,"slug":"challenge-user-defined-functions"},{"id":5376071650508800,"title":"Solution: User Defined Function","is_preview":false,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","author_id":10370001,"collection_id":4603192435802112,"page_id":5376071650508800,"slug":"solution-user-defined-function"}],"editMode":false,"type":"COLLECTION_CATEGORY","summary":"Build a foundation in creating and using UDFs in PySpark for custom transformations."},{"id":"tpwapnyw7","title":"Wrapping Up","pages":[{"id":6625513531768832,"title":"Conclusion","is_preview":false,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","author_id":10370001,"collection_id":4603192435802112,"page_id":6625513531768832,"is_lesson":false,"slug":"conclusion"}],"editMode":false,"type":"COLLECTION_CATEGORY","summary":"Solve problems in PySpark and pandas with newly acquired foundational skills."},{"id":"iqnm7vcr2","title":"Appendix","pages":[{"id":6415385233981440,"title":"Amazon Review Data (2018)","is_preview":false,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","author_id":10370001,"collection_id":4603192435802112,"page_id":6415385233981440,"is_lesson":false,"slug":"amazon-review-data-2018"},{"id":6260882073452544,"title":"pandas and PySpark: Behind the Scenes","is_preview":false,"parentIndex":null,"editMode":false,"is_recovered":false,"type":"collection_lesson","author_id":10370001,"collection_id":4603192435802112,"page_id":6260882073452544,"is_lesson":false,"slug":"pandas-and-pyspark-behind-the-scenes"}],"editMode":false,"type":"COLLECTION_CATEGORY","summary":"Focus on the Amazon Review Data (2018) and Pandas vs. PySpark performance."},{"page_id":5528092436332544,"id":5978386370789376,"title":"Apriori Algorithm for Finding Frequent Itemsets with PySpark","pages":[],"editMode":false,"type":"PATH_EXTERNAL_PROJECT","author_id":10370001,"collection_id":5097167412002816,"is_required":false,"detail_id":"project_10370001_5097167412002816_5528092436332544","cover_image_serving_url":null,"collection_read_time":0,"page_count":0,"brief_summary":null,"course_url_slug":null,"assessments_keys":[],"projects_keys":[],"optional_lessons":[],"time_limit":null}]},"page_titles":{"4551515674509312":"Getting Started","5373491146129408":"Overview of Dataset","5110078922817536":"Introduction to Data Input and Output","5436273887543296":"Read Data into DataFrame","6273870775975936":"Rename Attributes","4997668924817408":"Select a Subset of Attributes","5632546074787840":"Data Input and Output: Save a Snapshot","5550818551398400":"Read Parquet Data Source","6260882073452544":"pandas and PySpark: Behind the Scenes","6181638853099520":"Write Production Code","5822414566457344":"Quiz: Data Input and Output","5248305415585792":"Introduction to Data Transformation","5636323330752512":"Setup","6415385233981440":"Amazon Review Data (2018)","4657779658194944":"Handling Date-time","4722451228917760":"Impute Unavailable Data Points","4690628876697600":"Average Review per Product","4613371709620224":"Total Number of Reviews for Each Product","6209248849035264":"Distribution of the Review Text Length","4547273337339904":"Yearly Median Review","5598158452686848":"Top reviews of 2017","5384200898740224":"Compare Total Review of 2016 and 2017","6083742589779968":"Conversion Between Wide and Long Format using melt and pivot","6518708758904832":"Date Transformation: Save a Snapshot","5029245104947200":"Avoid Global Scope","6463130405699584":"Quiz: Data Transformation","5735610928594944":"Introduction to User-defined Functions","5794177304756224":"Object Conversion Between Python and Scala","4854219483381760":"Writing UDF","5055436890046464":"UDF in Action","6612337704566784":"UDF: Save a snapshot","5531610048364544":"Quiz: User-defined Functions","5604284519677952":"Challenge: Data Input and Output","5419636845969408":"Challenge: Data Transformation","6072628322500608":"Challenge: User-defined Functions","5893114711769088":"Solution: Data Input and Output","6271759028584448":"Solution: Data Transformation","5376071650508800":"Solution: User Defined Function","6625513531768832":"Conclusion"},"page_tags":{"4551515674509312":"","5373491146129408":"","5110078922817536":"","5436273887543296":"","6273870775975936":"","4997668924817408":"","5632546074787840":"","5550818551398400":"","6260882073452544":"","6181638853099520":"","5822414566457344":"","5248305415585792":"","5636323330752512":"","6415385233981440":"","4657779658194944":"","4722451228917760":"","4690628876697600":"","4613371709620224":"","6209248849035264":"","4547273337339904":"","5598158452686848":"","5384200898740224":"","6083742589779968":"","6518708758904832":"","6463130405699584":"","5531610048364544":"","5029245104947200":"","6612337704566784":"","5735610928594944":"","5794177304756224":"","4854219483381760":"","5055436890046464":"","5604284519677952":"","5419636845969408":"","6072628322500608":"","5893114711769088":"","6271759028584448":"","5376071650508800":"","6625513531768832":""},"collection_toc_is_enabled":true,"page_count":null,"docker":{"container":{"file":{"name":"frompandastopyspark.tar.gz","size":3263},"imageName":"author-10370001-collection-4603192435802112-rev-19-container-5388494793080832-frompandastopyspark","buildStatus":"SUCCESS","buildStatusUrl":"https://www.educative.io/api/author/10370001/collection/4603192435802112/containers/5388494793080832/build/status","buildLogUrl":"https://www.educative.io/api/author/10370001/collection/4603192435802112/containers/5388494793080832/build/log","metadata":{"sizeInBytes":3263},"id":-1,"tarballDownloadUrl":"https://www.educative.io/api/author/10370001/collection/4603192435802112/containers/5388494793080832/download","rebuildImageUrl":"https://www.educative.io/api/author/10370001/collection/4603192435802112/containers/5388494793080832/rebuild","track":false},"envs":[],"jobs":[{"key":"InW57XEnXG-Spohs5uSkT","name":"PandasAndPyspark","inputFileName":"main.py","runScript":"python3 main.py","jobType":"Default","runInLiveContainer":false},{"key":"YI-XDqXV0R96Ps6xzpAUN","name":"jupyter","inputFileName":"foo","runScript":"nohup jupyter notebook /usr/local/notebooks/helloworld.ipynb --allow-root --no-browser > /dev/null 2>&1 &","ports":"8080","startScript":"echo \"Hello World\"","jobType":"Live","forceRelaunchOnRun":false,"runInLiveContainer":true},{"key":"Nj2hfK_pv9CpkgVIOsqOF","name":"spa","inputFileName":"main.py","runScript":"echo \"Hello World\"","ports":"8080","startScript":"cd usercode && python3 main.py","jobType":"Live","forceRelaunchOnRun":true,"runInLiveContainer":true},{"key":"XbQzsqeJhYJWBEatHv1od","name":"spa-jupyter","inputFileName":"foo","runScript":"ls","ports":"8080","startScript":"nohup jupyter notebook /usr/local/notebooks/helloworld.ipynb --allow-root --no-browser > /dev/null 2>&1 &","jobType":"Live","forceRelaunchOnRun":false,"runInLiveContainer":true},{"key":"N59Vaww_X0MzzongZzoVr","name":"spa-snapshot","inputFileName":"main.py","runScript":"echo \"Hello World\"","ports":"8080","startScript":"mkdir -p data/snapshot/pandas && mkdir -p data/snapshot/pyspark && cd usercode && python3 main.py","jobType":"Live","forceRelaunchOnRun":true,"runInLiveContainer":true},{"key":"QBW03eA2m8ELleJxkat8w","name":"spa-copy","inputFileName":"main.py","runScript":"echo \"Hello World\"","ports":"8080","startScript":"cd usercode\nclear \npython3 pyspark_analysis.py","jobType":"Live","forceRelaunchOnRun":true,"runInLiveContainer":true},{"key":"8Y0snNOALNdPfZzXwz8WR","name":"spa-copy-copy","inputFileName":"main.py","runScript":"echo \"Hello World\"","ports":"8080","startScript":"cd usercode\nclear \npython3 pandas_analysis.py","jobType":"Live","forceRelaunchOnRun":true,"runInLiveContainer":true},{"key":"bbXHPqjghEgibNMVb9DxU","name":"spa-copy-copy-5klyxp","inputFileName":"main.py","runScript":"echo \"Hello World\"","ports":"8080","startScript":" cd usercode && python3 pyspark_analysis.py","jobType":"Live","forceRelaunchOnRun":true,"runInLiveContainer":true},{"key":"twI-7xgMuJnH-0UUZI-uJ","name":"spa-snapshot-copy","inputFileName":"main.py","runScript":"echo \"Hello World\"","ports":"8080","startScript":" cd usercode && mkdir -p data/snapshot/pandas && mkdir -p data/snapshot/pyspark && python3 pandas_analysis.py","jobType":"Live","forceRelaunchOnRun":true,"runInLiveContainer":true},{"key":"DJ5OFP3zWnyITgzdHeXyJ","name":"spa-snapshot-copy-copy","inputFileName":"main.py","runScript":"echo \"Hello World\"","ports":"8080","startScript":" cd usercode && mkdir -p data/snapshot/pandas && mkdir -p data/snapshot/pyspark && python3 pyspark_analysis.py","jobType":"Live","forceRelaunchOnRun":true,"runInLiveContainer":true},{"key":"MJlV300DroOwzOMOu-fbl","name":"spa-copy-copy-5klyxp-copy","inputFileName":"main.py","runScript":"echo \"Hello World\"","ports":"8080","startScript":" cd usercode && python3 udf_example.py","jobType":"Live","forceRelaunchOnRun":true,"runInLiveContainer":true}],"testRunners":[],"version":3,"loaded":true},"discounted_price":null,"cover_image_id":5399836813950976,"cover_image_metadata":"{\"width\":1024,\"height\":512,\"sizeInBytes\":41101,\"name\":\"PANDAS-TO-PYSPARK-COURSE-.png\"}","cover_image_serving_url":"/v2api/collection/10370001/4603192435802112/image/5399836813950976","tags":["Pyspark","Pandas","UDF","Dataframe","statistical analysis"],"intro_video_url":"","intro_video_thumbnail_url":"","aggregated_widget_stats":{"projects":0,"assessments":0,"Code":8,"codeExerciseCount":0,"codeRunnableCount":54,"codeSnippetCount":14,"illustrations":27,"SlateHTML":53,"TerminalWidget":1,"LiveApp":0,"WebpackBin":53,"MarkdownEditor":122,"LinkedList":1,"TabbedCode":3,"MxGraphWidget":25,"NaryTree":0,"Table":0,"Quiz":3,"Columns":1,"cloudlabs":0},"default_themes":{"code_themes":{"Code":"default","Markdown":"default","RunJS":"default","SPA":"default","isForced":{"Code":false,"Markdown":false,"RunJS":false,"SPA":false}}},"api_keys":{"api_keys":[]},"skills":[],"testimonials":[],"licensing":null,"target_audience":"intermediate","author_id":"10370001","collection_id":"4603192435802112","approval_status":3005,"price":29,"is_private":false,"path_type":"regular","organization_id":null,"is_mini":false,"is_priced":true,"brief_summary":"Gain insights into enhancing Python data processing with PySpark. Delve into reading, transforming, aggregating data, and creating user-defined functions, boosting efficiency with Apache Spark.","approval_update_time":"2022-10-18T13:24:36.511Z","rating_visibility":true,"update_last_published_on_homepage":true,"show_developed_by":true,"udata_files":[],"CodeThemes":{"Code":"default","Markdown":"default","RunJS":"default","SPA":"default","isForced":{"Code":false,"Markdown":false,"RunJS":false,"SPA":false}},"is_marked_for_deletion":false,"transition_page_title":"","is_redirectable":false,"collection_type":"collection","adaptive_learning_mode":false,"HLOs_to_toc":{},"is_guide":false,"read_time":10980,"allow_logged_out_executions":false,"unique_live_widget_urls":false,"metadata_status":101},"pageSummarySSR":{"title":"Introduction to User-defined Functions","description":"Learn about user-defined functions in detail.","discourse_page_url":"https://discuss.educative.io/tag/introduction-to-user-defined-functions__user-defined-function-udf__from-pandas-to-pyspark-dataframe?open=true&ctag=from-pandas-to-pyspark-dataframe__mrdatapsycho&cslug=pandas-to-pyspark-dataframe&pslug=introduction-to-user-defined-functions"},"adaptiveLearningConfigConstantSSR":0,"enableLessonPageLockedBannerV2":true,"allowAllLessonPreview":false,"lockedBannerStatsSSR":{"b2cTrialStats":{"is_b2c_trial_active":true,"b2c_trial_active_duration":7,"b2c_trial_categories":"$125"},"b2cStatus":100,"learnerTags":"$126","workStats":1450,"interviewWorksStats":78,"inL2cStarterPack":false,"l2cWorkStats":43,"enableL2cStarterPackPaymentWidget":"true"},"pageTocSSR":"

","authorId":"10370001","collectionId":"4603192435802112","pageId":"5735610928594944","isCollectionPageLockedCachingEnabled":true,"aceFeatureFlags":{"enableAceEditor":true,"enableAceEditorForAnswers":true},"meta":{"type":["Article","TechArticle"],"title":"Introduction to User-defined Functions","name":"From Pandas to PySpark DataFrame","description":"Learn about user-defined functions in detail.","image":"https://educative.io/api/collection/10370001/4603192435802112/image/5399836813950976.png","isAccessibleForFree":false,"keywords":"$126","provider":"Educative","publisher":"Educative","id":"courses/pandas-to-pyspark-dataframe/introduction-to-user-defined-functions","author":"Educative","educationalLevel":"intermediate","noIndex":true,"isForcedNoIndex":true,"noFollow":false,"redirectInfo":{"isDeletedCollectionPageRedirectable":false},"page_titles":"$127","is_marked_for_deletion":false,"transition_page_title":"","is_redirectable":false,"deleted_course_lesson_redirect":{"author_id":null,"collection_id":null,"page_id":null,"redirect_url_slug":null},"metadata_status":101,"additional_course_alternatives":[]},"requestUrl":"/courses/pandas-to-pyspark-dataframe/introduction-to-user-defined-functions","requestUrlInfo":{"authorId":"10370001","collectionId":"4603192435802112","pageId":"5735610928594944","courseUrlSlug":"pandas-to-pyspark-dataframe","pageUrlSlug":"introduction-to-user-defined-functions"},"isExternalContent":false}}],[["$","script",null,{"id":"generate-data","type":"application/ld+json","dangerouslySetInnerHTML":{"__html":"$128"}}],false,"$undefined"]]