Spaces:
				
			
			
	
			
			
					
		Running
		
			on 
			
			CPU Upgrade
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
			on 
			
			CPU Upgrade
	adding citations
Browse files- content.py +58 -0
    	
        content.py
    CHANGED
    
    | @@ -68,5 +68,63 @@ CITATION_BUTTON_TEXT = r"""@misc{open-llm-leaderboard, | |
| 68 | 
             
              publisher = {Hugging Face},
         | 
| 69 | 
             
              howpublished = "\url{https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard}"
         | 
| 70 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 71 | 
             
            }"""
         | 
| 72 |  | 
|  | |
| 68 | 
             
              publisher = {Hugging Face},
         | 
| 69 | 
             
              howpublished = "\url{https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard}"
         | 
| 70 |  | 
| 71 | 
            +
            }
         | 
| 72 | 
            +
            @software{eval-harness,
         | 
| 73 | 
            +
              author       = {Gao, Leo and
         | 
| 74 | 
            +
                              Tow, Jonathan and
         | 
| 75 | 
            +
                              Biderman, Stella and
         | 
| 76 | 
            +
                              Black, Sid and
         | 
| 77 | 
            +
                              DiPofi, Anthony and
         | 
| 78 | 
            +
                              Foster, Charles and
         | 
| 79 | 
            +
                              Golding, Laurence and
         | 
| 80 | 
            +
                              Hsu, Jeffrey and
         | 
| 81 | 
            +
                              McDonell, Kyle and
         | 
| 82 | 
            +
                              Muennighoff, Niklas and
         | 
| 83 | 
            +
                              Phang, Jason and
         | 
| 84 | 
            +
                              Reynolds, Laria and
         | 
| 85 | 
            +
                              Tang, Eric and
         | 
| 86 | 
            +
                              Thite, Anish and
         | 
| 87 | 
            +
                              Wang, Ben and
         | 
| 88 | 
            +
                              Wang, Kevin and
         | 
| 89 | 
            +
                              Zou, Andy},
         | 
| 90 | 
            +
              title        = {A framework for few-shot language model evaluation},
         | 
| 91 | 
            +
              month        = sep,
         | 
| 92 | 
            +
              year         = 2021,
         | 
| 93 | 
            +
              publisher    = {Zenodo},
         | 
| 94 | 
            +
              version      = {v0.0.1},
         | 
| 95 | 
            +
              doi          = {10.5281/zenodo.5371628},
         | 
| 96 | 
            +
              url          = {https://doi.org/10.5281/zenodo.5371628}
         | 
| 97 | 
            +
            }
         | 
| 98 | 
            +
            @misc{clark2018think,
         | 
| 99 | 
            +
                  title={Think you have Solved Question Answering? Try ARC, the AI2 Reasoning Challenge}, 
         | 
| 100 | 
            +
                  author={Peter Clark and Isaac Cowhey and Oren Etzioni and Tushar Khot and Ashish Sabharwal and Carissa Schoenick and Oyvind Tafjord},
         | 
| 101 | 
            +
                  year={2018},
         | 
| 102 | 
            +
                  eprint={1803.05457},
         | 
| 103 | 
            +
                  archivePrefix={arXiv},
         | 
| 104 | 
            +
                  primaryClass={cs.AI}
         | 
| 105 | 
            +
            }
         | 
| 106 | 
            +
            @misc{zellers2019hellaswag,
         | 
| 107 | 
            +
                  title={HellaSwag: Can a Machine Really Finish Your Sentence?}, 
         | 
| 108 | 
            +
                  author={Rowan Zellers and Ari Holtzman and Yonatan Bisk and Ali Farhadi and Yejin Choi},
         | 
| 109 | 
            +
                  year={2019},
         | 
| 110 | 
            +
                  eprint={1905.07830},
         | 
| 111 | 
            +
                  archivePrefix={arXiv},
         | 
| 112 | 
            +
                  primaryClass={cs.CL}
         | 
| 113 | 
            +
            }
         | 
| 114 | 
            +
            @misc{hendrycks2021measuring,
         | 
| 115 | 
            +
                  title={Measuring Massive Multitask Language Understanding}, 
         | 
| 116 | 
            +
                  author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},
         | 
| 117 | 
            +
                  year={2021},
         | 
| 118 | 
            +
                  eprint={2009.03300},
         | 
| 119 | 
            +
                  archivePrefix={arXiv},
         | 
| 120 | 
            +
                  primaryClass={cs.CY}
         | 
| 121 | 
            +
            }
         | 
| 122 | 
            +
            @misc{lin2022truthfulqa,
         | 
| 123 | 
            +
                  title={TruthfulQA: Measuring How Models Mimic Human Falsehoods}, 
         | 
| 124 | 
            +
                  author={Stephanie Lin and Jacob Hilton and Owain Evans},
         | 
| 125 | 
            +
                  year={2022},
         | 
| 126 | 
            +
                  eprint={2109.07958},
         | 
| 127 | 
            +
                  archivePrefix={arXiv},
         | 
| 128 | 
            +
                  primaryClass={cs.CL}
         | 
| 129 | 
             
            }"""
         | 
| 130 |  | 

