mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2026-02-19 14:13:22 +02:00
Compare commits
11 Commits
b2573
...
gg/authors
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
072e0a4d3b | ||
|
|
0e0d4e821f | ||
|
|
805d705032 | ||
|
|
37e7854c10 | ||
|
|
c342d070c6 | ||
|
|
f7fc5f6c6f | ||
|
|
ba0c7c70ab | ||
|
|
d48ccf3ad4 | ||
|
|
069574775c | ||
|
|
cfde806eb9 | ||
|
|
b910287954 |
@@ -1,5 +1,5 @@
|
||||
# SRPM for building from source and packaging an RPM for RPM-based distros.
|
||||
# https://fedoraproject.org/wiki/How_to_create_an_RPM_package
|
||||
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
|
||||
# Built and maintained by John Boero - boeroboy@gmail.com
|
||||
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# SRPM for building from source and packaging an RPM for RPM-based distros.
|
||||
# https://fedoraproject.org/wiki/How_to_create_an_RPM_package
|
||||
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
|
||||
# Built and maintained by John Boero - boeroboy@gmail.com
|
||||
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# SRPM for building from source and packaging an RPM for RPM-based distros.
|
||||
# https://fedoraproject.org/wiki/How_to_create_an_RPM_package
|
||||
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
|
||||
# Built and maintained by John Boero - boeroboy@gmail.com
|
||||
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
|
||||
|
||||
|
||||
3
.github/workflows/bench.yml
vendored
3
.github/workflows/bench.yml
vendored
@@ -25,7 +25,7 @@ on:
|
||||
branches:
|
||||
- master
|
||||
paths: ['.github/workflows/bench.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/bench/**.*']
|
||||
pull_request:
|
||||
pull_request_target:
|
||||
types: [opened, synchronize, reopened]
|
||||
paths: ['.github/workflows/bench.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/bench/**.*']
|
||||
schedule:
|
||||
@@ -143,7 +143,6 @@ jobs:
|
||||
|
||||
- name: Commit status
|
||||
uses: Sibz/github-status-action@v1
|
||||
continue-on-error: true # If not authorized on external repo
|
||||
with:
|
||||
authToken: ${{secrets.GITHUB_TOKEN}}
|
||||
sha: ${{ inputs.sha || github.event.pull_request.head.sha || github.sha }}
|
||||
|
||||
655
AUTHORS
Normal file
655
AUTHORS
Normal file
@@ -0,0 +1,655 @@
|
||||
# date: Tue Apr 9 09:17:14 EEST 2024
|
||||
# this file is auto-generated by scripts/gen-authors.sh
|
||||
|
||||
0cc4m <picard12@live.de>
|
||||
0xspringtime <110655352+0xspringtime@users.noreply.github.com>
|
||||
2f38b454 <dxf@protonmail.com>
|
||||
3ooabkhxtn <31479382+3ooabkhxtn@users.noreply.github.com>
|
||||
44670 <44670@users.noreply.github.com>
|
||||
AN Long <aisk@users.noreply.github.com>
|
||||
AT <manyoso@users.noreply.github.com>
|
||||
Aarni Koskela <akx@iki.fi>
|
||||
Aaron Miller <apage43@ninjawhale.com>
|
||||
Aaryaman Vasishta <aaryaman.vasishta@amd.com>
|
||||
Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com>
|
||||
Abhishek Gopinath K <31348521+overtunned@users.noreply.github.com>
|
||||
Adithya Balaji <adithya.b94@gmail.com>
|
||||
AdithyanI <adithyan.i4internet@gmail.com>
|
||||
Adrian <smith.adriane@gmail.com>
|
||||
Adrian Hesketh <a-h@users.noreply.github.com>
|
||||
AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com>
|
||||
Aisuko <urakiny@gmail.com>
|
||||
Alberto <57916483+albbus-stack@users.noreply.github.com>
|
||||
Alex <awhill19@icloud.com>
|
||||
Alex Azarov <alex@azarov.by>
|
||||
Alex Azarov <alexander.azarov@mapbox.com>
|
||||
Alex Klinkhamer <from.github.com.917@grencez.dev>
|
||||
Alex Klinkhamer <git@grencez.dev>
|
||||
Alex Nguyen <tiendung@users.noreply.github.com>
|
||||
Alex Petenchea <alex.petenchea@gmail.com>
|
||||
Alex Renda <alexrenda@users.noreply.github.com>
|
||||
Alex von Gluck IV <kallisti5@unixzen.com>
|
||||
Alexey Parfenov <zxed@alkatrazstudio.net>
|
||||
Ali Chraghi <63465728+alichraghi@users.noreply.github.com>
|
||||
Ali Nehzat <ali.nehzat@thanks.dev>
|
||||
Ali Tariq <ali.tariq@10xengineers.ai>
|
||||
Alon <alonfaraj@gmail.com>
|
||||
AlpinDale <52078762+AlpinDale@users.noreply.github.com>
|
||||
AmirAli Mirian <37371367+amiralimi@users.noreply.github.com>
|
||||
Ananta Bastola <anantarajbastola@gmail.com>
|
||||
Anas Ahouzi <112881240+aahouzi@users.noreply.github.com>
|
||||
András Salamon <ott2@users.noreply.github.com>
|
||||
Andrei <abetlen@gmail.com>
|
||||
Andrew Canis <andrew.canis@gmail.com>
|
||||
Andrew Duffy <a10y@users.noreply.github.com>
|
||||
Andrew Godfrey <AndrewGodfrey@users.noreply.github.com>
|
||||
Arik Poznanski <arikpoz@users.noreply.github.com>
|
||||
Artem <guinmoon@gmail.com>
|
||||
Artyom Lebedev <vagran.ast@gmail.com>
|
||||
Asbjørn Olling <asbjornolling@gmail.com>
|
||||
Ásgeir Bjarni Ingvarsson <asgeir@fundinn.org>
|
||||
Ashok Gelal <401055+ashokgelal@users.noreply.github.com>
|
||||
Ashraful Islam <ashraful.meche@gmail.com>
|
||||
Atsushi Tatsuma <yoshoku@outlook.com>
|
||||
Austin <77757836+teleprint-me@users.noreply.github.com>
|
||||
AustinMroz <austinmroz@utexas.edu>
|
||||
BADR <contact@pythops.com>
|
||||
Bach Le <bach@bullno1.com>
|
||||
Bailey Chittle <39804642+bachittle@users.noreply.github.com>
|
||||
BarfingLemurs <128182951+BarfingLemurs@users.noreply.github.com>
|
||||
Behnam M <58621210+ibehnam@users.noreply.github.com>
|
||||
Ben Garney <bengarney@users.noreply.github.com>
|
||||
Ben Siraphob <bensiraphob@gmail.com>
|
||||
Ben Williams <ben@719ben.com>
|
||||
Benjamin Lecaillon <84293038+blecaillon@users.noreply.github.com>
|
||||
Bernat Vadell <hounter.caza@gmail.com>
|
||||
Bodo Graumann <mail@bodograumann.de>
|
||||
Bono Lv <lvscar@users.noreply.github.com>
|
||||
Borislav Stanimirov <b.stanimirov@abv.bg>
|
||||
Branden Butler <bwtbutler@hotmail.com>
|
||||
Brian <mofosyne@gmail.com>
|
||||
Bruce MacDonald <brucewmacdonald@gmail.com>
|
||||
CJ Pais <cj@cjpais.com>
|
||||
CRD716 <crd716@gmail.com>
|
||||
Cameron <csteele@steelecameron.com>
|
||||
Cameron Kaiser <classilla@users.noreply.github.com>
|
||||
Casey Primozic <casey@cprimozic.net>
|
||||
Casey Primozic <me@ameo.link>
|
||||
CausalLM <148736309+CausalLM@users.noreply.github.com>
|
||||
Cebtenzzre <cebtenzzre@gmail.com>
|
||||
Chad Brewbaker <crb002@gmail.com>
|
||||
Cheng Shao <terrorjack@type.dance>
|
||||
Chris Kuehl <ckuehl@ckuehl.me>
|
||||
Christian Demsar <christian@github.email.demsar.us>
|
||||
Christian Demsar <crasm@git.vczf.us>
|
||||
Christian Falch <875252+chrfalch@users.noreply.github.com>
|
||||
Christian Kögler <ck3d@gmx.de>
|
||||
Clark Saben <76020733+csaben@users.noreply.github.com>
|
||||
Clint Herron <hanclinto@gmail.com>
|
||||
Cuong Trinh Manh <nguoithichkhampha@gmail.com>
|
||||
DAN™ <dranger003@gmail.com>
|
||||
Damian Stewart <d@damianstewart.com>
|
||||
Dane Madsen <dane_madsen@hotmail.com>
|
||||
DaniAndTheWeb <57776841+DaniAndTheWeb@users.noreply.github.com>
|
||||
Daniel Bevenius <daniel.bevenius@gmail.com>
|
||||
Daniel Drake <drake@endlessos.org>
|
||||
Daniel Hiltgen <dhiltgen@users.noreply.github.com>
|
||||
Daniel Illescas Romero <illescas.daniel@protonmail.com>
|
||||
DannyDaemonic <DannyDaemonic@gmail.com>
|
||||
Dat Quoc Nguyen <2412555+datquocnguyen@users.noreply.github.com>
|
||||
Dave Della Costa <ddellacosta+github@gmail.com>
|
||||
David Friehs <david@friehs.info>
|
||||
David Kennedy <dakennedyd@gmail.com>
|
||||
David Pflug <david@pflug.email>
|
||||
David Renshaw <dwrenshaw@gmail.com>
|
||||
David Sommers <12738+databyte@users.noreply.github.com>
|
||||
David Yang <davidyang6us@gmail.com>
|
||||
Dawid Wysocki <62249621+TortillaZHawaii@users.noreply.github.com>
|
||||
Dean <Dean.Sinaean@gmail.com>
|
||||
Deins <deinsegle@gmail.com>
|
||||
Didzis Gosko <didzis@users.noreply.github.com>
|
||||
Don Mahurin <dmahurin@users.noreply.github.com>
|
||||
DooWoong Lee (David) <manics99@naver.com>
|
||||
Doomsdayrs <38189170+Doomsdayrs@users.noreply.github.com>
|
||||
Douglas Hanley <thesecretaryofwar@gmail.com>
|
||||
Dr. Tom Murphy VII Ph.D <499244+tom7@users.noreply.github.com>
|
||||
Ebey Abraham <ebey97@gmail.com>
|
||||
Ed Lee <edilee@mozilla.com>
|
||||
Ed Lepedus <ed.lepedus@googlemail.com>
|
||||
Edward Taylor <edeetee@gmail.com>
|
||||
Elbios <141279586+Elbios@users.noreply.github.com>
|
||||
Engininja2 <139037756+Engininja2@users.noreply.github.com>
|
||||
Equim <sayaka@ekyu.moe>
|
||||
Eric Sommerlade <es0m@users.noreply.github.com>
|
||||
Eric Zhang <34133756+EZForever@users.noreply.github.com>
|
||||
Erik Garrison <erik.garrison@gmail.com>
|
||||
Erik Scholz <Green-Sky@users.noreply.github.com>
|
||||
Ettore Di Giacinto <mudler@users.noreply.github.com>
|
||||
Evan Jones <evan.q.jones@gmail.com>
|
||||
Evan Miller <emmiller@gmail.com>
|
||||
Eve <139727413+netrunnereve@users.noreply.github.com>
|
||||
Evgeny Kurnevsky <kurnevsky@gmail.com>
|
||||
Ewout ter Hoeven <E.M.terHoeven@student.tudelft.nl>
|
||||
ExtReMLapin <3909752+ExtReMLapin@users.noreply.github.com>
|
||||
FK <sozforex@gmail.com>
|
||||
Fabian <cmdrf@users.noreply.github.com>
|
||||
Fabio R. Sluzala <Fabio3rs@users.noreply.github.com>
|
||||
Faez Shakil <faez.shakil@gmail.com>
|
||||
FantasyGmm <16450052+FantasyGmm@users.noreply.github.com>
|
||||
Fattire <528174+fat-tire@users.noreply.github.com>
|
||||
Felix <stenbackfelix@gmail.com>
|
||||
Finn Voorhees <finnvoorhees@gmail.com>
|
||||
Firat <firatkiral@gmail.com>
|
||||
Folko-Ven <71110216+Folko-Ven@users.noreply.github.com>
|
||||
Foul-Tarnished <107711110+Foul-Tarnished@users.noreply.github.com>
|
||||
Francisco Melo <43780565+francis2tm@users.noreply.github.com>
|
||||
FrankHB <frankhb1989@gmail.com>
|
||||
Frederik Vogel <Schaltfehler@users.noreply.github.com>
|
||||
Gabe Goodhart <gabe.l.hart@gmail.com>
|
||||
GainLee <perfecter.gen@gmail.com>
|
||||
Galunid <karolek1231456@gmail.com>
|
||||
Gary Linscott <glinscott@gmail.com>
|
||||
Gary Mulder <gjmulder@gmail.com>
|
||||
Genkagaku.GPT <hlhr202@163.com>
|
||||
Georgi Gerganov <ggerganov@gmail.com>
|
||||
Gilad S <giladgd@users.noreply.github.com>
|
||||
GiviMAD <GiviMAD@users.noreply.github.com>
|
||||
Govlzkoy <gotope@users.noreply.github.com>
|
||||
Guillaume "Vermeille" Sanchez <Guillaume.V.Sanchez@gmail.com>
|
||||
Guillaume Wenzek <gwenzek@users.noreply.github.com>
|
||||
Guoteng <32697156+SolenoidWGT@users.noreply.github.com>
|
||||
Gustavo Rocha Dias <91472747+gustrd@users.noreply.github.com>
|
||||
Halalaluyafail3 <55773281+Halalaluyafail3@users.noreply.github.com>
|
||||
Haohui Mai <ricetons@gmail.com>
|
||||
Haoxiang Fei <tonyfettes@tonyfettes.com>
|
||||
Harald Fernengel <harald.fernengel@here.com>
|
||||
Hatsune Miku <129688334+at8u@users.noreply.github.com>
|
||||
Henk Poley <HenkPoley@gmail.com>
|
||||
Henri Vasserman <henv@hot.ee>
|
||||
Henrik Forstén <henrik.forsten@gmail.com>
|
||||
Herman Semenov <GermanAizek@yandex.ru>
|
||||
Hesen Peng <hesen.peng@gmail.com>
|
||||
Hoang Nguyen <hugo53@users.noreply.github.com>
|
||||
Hongyu Ouyang <96765450+casavaca@users.noreply.github.com>
|
||||
Howard Su <howard0su@gmail.com>
|
||||
Hua Jiang <allenhjiang@outlook.com>
|
||||
Huawei Lin <huaweilin.cs@gmail.com>
|
||||
Ian Bull <irbull@eclipsesource.com>
|
||||
Ian Bull <irbull@gmail.com>
|
||||
Ian Scrivener <github@zilogy.asia>
|
||||
Ido S <ido.pluto@gmail.com>
|
||||
IgnacioFDM <ignaciofdm@gmail.com>
|
||||
Igor Okulist <okigan@gmail.com>
|
||||
Ikko Eltociear Ashimine <eltociear@gmail.com>
|
||||
Ilya Kurdyukov <59548320+ilyakurdyukov@users.noreply.github.com>
|
||||
Ionoclast Laboratories <brigham@ionoclast.com>
|
||||
Isaac McFadyen <isaac@imcf.me>
|
||||
IsaacDynamo <61521674+IsaacDynamo@users.noreply.github.com>
|
||||
Ivan Komarov <Ivan.Komarov@dfyz.info>
|
||||
Ivan Stepanov <ivanstepanovftw@gmail.com>
|
||||
JH23X <165871467+JH23X@users.noreply.github.com>
|
||||
Jack Mousseau <jmousseau@users.noreply.github.com>
|
||||
JackJollimore <130917767+JackJollimore@users.noreply.github.com>
|
||||
Jag Chadha <jagtesh@gmail.com>
|
||||
Jakub N <jakubniemczyk97@gmail.com>
|
||||
James Reynolds <magnusviri@users.noreply.github.com>
|
||||
Jan Boon <jan.boon@kaetemi.be>
|
||||
Jan Boon <kaetemi@gmail.com>
|
||||
Jan Ploski <jpl@plosquare.com>
|
||||
Jannis Schönleber <joennlae@gmail.com>
|
||||
Jared Van Bortel <cebtenzzre@gmail.com>
|
||||
Jared Van Bortel <jared@nomic.ai>
|
||||
Jason McCartney <jmac@theroot.org>
|
||||
Jean-Christophe Hoelt <hoelt@fovea.cc>
|
||||
Jean-Michaël Celerier <jeanmichael.celerier+github@gmail.com>
|
||||
Jed Fox <git@jedfox.com>
|
||||
Jeffrey Quesnelle <emozilla@nousresearch.com>
|
||||
Jesse Jojo Johnson <williamsaintgeorge@gmail.com>
|
||||
Jhen-Jie Hong <iainst0409@gmail.com>
|
||||
Jiahao Li <liplus17@163.com>
|
||||
Jian Liao <jianliao@users.noreply.github.com>
|
||||
JidongZhang-THU <1119708529@qq.com>
|
||||
Jinwoo Jeong <33892306+williamjeong2@users.noreply.github.com>
|
||||
Jiří Podivín <66251151+jpodivin@users.noreply.github.com>
|
||||
Johannes Gäßler <johannesg@5d6.de>
|
||||
Johannes Rudolph <johannes.rudolph@gmail.com>
|
||||
John <78893154+cmp-nct@users.noreply.github.com>
|
||||
John Balis <phobossystems@gmail.com>
|
||||
John Smith <67539080+kingsidelee@users.noreply.github.com>
|
||||
JohnnyB <jboero@users.noreply.github.com>
|
||||
Jonas Wunderlich <32615971+jonas-w@users.noreply.github.com>
|
||||
Jorge A <161275481+jorgealias@users.noreply.github.com>
|
||||
Jose Maldonado <63384398+yukiteruamano@users.noreply.github.com>
|
||||
Joseph Stahl <1269177+josephst@users.noreply.github.com>
|
||||
Joyce <joycebrum@google.com>
|
||||
Juan Calderon-Perez <835733+gaby@users.noreply.github.com>
|
||||
Judd <foldl@users.noreply.github.com>
|
||||
Julius Arkenberg <arki05@users.noreply.github.com>
|
||||
Jun Jie <71215065+junnjiee16@users.noreply.github.com>
|
||||
Juraj Bednar <juraj@bednar.io>
|
||||
Justin Parker <jparkerweb@gmail.com>
|
||||
Justin Suess <justin.suess@westpoint.edu>
|
||||
Justine Tunney <jtunney@gmail.com>
|
||||
Juuso Alasuutari <juuso.alasuutari@gmail.com>
|
||||
KASR <karim.asrih@gmail.com>
|
||||
Kamil Tomšík <info@tomsik.cz>
|
||||
Karsten Weiss <knweiss@gmail.com>
|
||||
Karthick <j.karthic2004@gmail.com>
|
||||
Karthik Kumar Viswanathan <195178+guilt@users.noreply.github.com>
|
||||
Karthik Sethuraman <k.seth1993@gmail.com>
|
||||
Kasumi <90275229+kasumi-1@users.noreply.github.com>
|
||||
Kawrakow <48489457+ikawrakow@users.noreply.github.com>
|
||||
Keiichi Tabata <keiichi.tabata@outlook.com>
|
||||
Kenvix ⭐ <kenvixzure@live.com>
|
||||
Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com>
|
||||
Kevin Ji <1146876+kevinji@users.noreply.github.com>
|
||||
Kevin Kwok <antimatter15@gmail.com>
|
||||
Kevin Lo <kevlo@kevlo.org>
|
||||
Kolen Cheung <ickc@users.noreply.github.com>
|
||||
Konstantin Herud <konstantin.herud@denkbares.com>
|
||||
Konstantin Zhuravlyov <konstantin.zhuravlyov@amd.com>
|
||||
Kunshang Ji <kunshang.ji@intel.com>
|
||||
Kyle Liang <liangmanlai@gmail.com>
|
||||
Kyle Mistele <kyle@mistele.com>
|
||||
Kylin <56434533+KyL0N@users.noreply.github.com>
|
||||
Lars Grammel <lars.grammel@gmail.com>
|
||||
Laura <Tijntje_7@msn.com>
|
||||
Lee <44310445+lx200916@users.noreply.github.com>
|
||||
Lee Drake <b.lee.drake@gmail.com>
|
||||
Leng Yue <lengyue@lengyue.me>
|
||||
LeonEricsson <70749762+LeonEricsson@users.noreply.github.com>
|
||||
Leonardo Neumann <leonardo@neumann.dev.br>
|
||||
Li Tan <tanliboy@gmail.com>
|
||||
Linwei Wang <wanix1988@gmail.com>
|
||||
LoganDark <github@logandark.mozmail.com>
|
||||
LostRuins <39025047+LostRuins@users.noreply.github.com>
|
||||
Luciano <lucianostrika44@gmail.com>
|
||||
Luo Tian <lt@basecity.com>
|
||||
M. Yusuf Sarıgöz <yusufsarigoz@gmail.com>
|
||||
Maarten ter Huurne <maarten@treewalker.org>
|
||||
Mack Straight <eiz@users.noreply.github.com>
|
||||
Maël Kerbiriou <m431.kerbiriou@gmail.com>
|
||||
MaggotHATE <clay1326@gmail.com>
|
||||
Marc Köhlbrugge <subscriptions@marckohlbrugge.com>
|
||||
Marco Matthies <71844+marcom@users.noreply.github.com>
|
||||
Marcus Dunn <51931484+MarcusDunn@users.noreply.github.com>
|
||||
Marian Cepok <marian.cepok@gmail.com>
|
||||
Mark Fairbairn <thebaron88@gmail.com>
|
||||
Marko Tasic <mtasic85@gmail.com>
|
||||
Martin Krasser <krasserm@googlemail.com>
|
||||
Martin Schwaighofer <mschwaig@users.noreply.github.com>
|
||||
Marvin Gießing <marvin.giessing@gmail.com>
|
||||
Mateusz Charytoniuk <mateusz.charytoniuk@protonmail.com>
|
||||
Matheus C. França <matheus-catarino@hotmail.com>
|
||||
Matheus Gabriel Alves Silva <matheusgasource@gmail.com>
|
||||
Mathieu Nayrolles <MathieuNls@users.noreply.github.com>
|
||||
Mathijs de Bruin <mathijs@mathijsfietst.nl>
|
||||
Matt Clayton <156335168+mattjcly@users.noreply.github.com>
|
||||
Matt Pulver <matt.pulver@heavy.ai>
|
||||
Matteo Boschini <12133566+mbosc@users.noreply.github.com>
|
||||
Matthew Tejo <matthew.tejo@gmail.com>
|
||||
Matvey Soloviev <blackhole89@gmail.com>
|
||||
Maxime <672982+maximegmd@users.noreply.github.com>
|
||||
Maximilian Winter <maximilian.winter.91@gmail.com>
|
||||
Meng Zhang <meng@tabbyml.com>
|
||||
Meng, Hengyu <hengyu.meng@intel.com>
|
||||
Merrick Christensen <merrick.christensen@gmail.com>
|
||||
Michael Coppola <m18coppola@gmail.com>
|
||||
Michael Hueschen <m@mhueschen.dev>
|
||||
Michael Kesper <mkesper@schokokeks.org>
|
||||
Michael Klimenko <mklimenko29@gmail.com>
|
||||
Michael Podvitskiy <podvitskiymichael@gmail.com>
|
||||
Michael Potter <NanoTekGuy@Gmail.com>
|
||||
Michaël de Vries <vriesdemichael@gmail.com>
|
||||
Mihai <mihai.chirculescu@yahoo.com>
|
||||
Mike <ytianhui2004@gmail.com>
|
||||
Minsoo Cheong <54794500+mscheong01@users.noreply.github.com>
|
||||
Mirko185 <mirkosig@gmail.com>
|
||||
Mirror Azure <54669636+MirrorAzure@users.noreply.github.com>
|
||||
Miwa / Ensan <63481257+ensan-hcl@users.noreply.github.com>
|
||||
Mohammadreza Hendiani <hendiani.mohammadreza@gmail.com>
|
||||
Murilo Santana <mvrilo@gmail.com>
|
||||
Musab Gultekin <musabgultekin@users.noreply.github.com>
|
||||
Nam D. Tran <42194884+namtranase@users.noreply.github.com>
|
||||
NawafAlansari <72708095+NawafAlansari@users.noreply.github.com>
|
||||
Nebula <infinitewormhole@gmail.com>
|
||||
Neo Zhang Jianyu <jianyu.zhang@intel.com>
|
||||
Neuman Vong <neuman.vong@gmail.com>
|
||||
Nexesenex <124105151+Nexesenex@users.noreply.github.com>
|
||||
Niall Coates <1349685+Niall-@users.noreply.github.com>
|
||||
Nicolai Weitkemper <kontakt@nicolaiweitkemper.de>
|
||||
Nigel Bosch <pnigelb@gmail.com>
|
||||
Niklas Korz <niklas@niklaskorz.de>
|
||||
Nindaleth <Nindaleth@users.noreply.github.com>
|
||||
Oleksandr Nikitin <oleksandr@tvori.info>
|
||||
Oleksii Maryshchenko <oleksii.maryshchenko@gmail.com>
|
||||
Olivier Chafik <ochafik@users.noreply.github.com>
|
||||
Ondřej Čertík <ondrej@certik.us>
|
||||
Ouadie EL FAROUKI <ouadie.elfarouki@codeplay.com>
|
||||
Paul Tsochantaris <ptsochantaris@icloud.com>
|
||||
Pavol Rusnak <pavol@rusnak.io>
|
||||
Pedro Cuenca <pedro@huggingface.co>
|
||||
Peter Sugihara <peter@campsh.com>
|
||||
Phil H <5756783+phiharri@users.noreply.github.com>
|
||||
Philip Taron <philip.taron@gmail.com>
|
||||
Phillip Kravtsov <phillip@kravtsov.net>
|
||||
Pierre Alexandre SCHEMBRI <pa.schembri@gmail.com>
|
||||
Pierrick Hymbert <pierrick.hymbert@gmail.com>
|
||||
Przemysław Pawełczyk <przemoc@gmail.com>
|
||||
Qin Yue Chen <71813199+chenqiny@users.noreply.github.com>
|
||||
Qingyou Meng <meng.qingyou@gmail.com>
|
||||
Qu Zongfu <43257352+yancaoweidaode@users.noreply.github.com>
|
||||
RJ Adriaansen <adriaansen@eshcc.eur.nl>
|
||||
Radoslav Gerganov <rgerganov@gmail.com>
|
||||
Radosław Gryta <radek.gryta@gmail.com>
|
||||
Rahul Vivek Nair <68507071+RahulVivekNair@users.noreply.github.com>
|
||||
Rand Xie <randxiexyy29@gmail.com>
|
||||
Randall Fitzgerald <randall@dasaku.net>
|
||||
Reinforce-II <fate@eastal.com>
|
||||
Riceball LEE <snowyu.lee@gmail.com>
|
||||
Richard Kiss <him@richardkiss.com>
|
||||
Richard Roberson <richardr1126@gmail.com>
|
||||
Rick G <26732651+TheFlipbook@users.noreply.github.com>
|
||||
Rickard Edén <rickardeden@gmail.com>
|
||||
Rickard Hallerbäck <rickard.hallerback@gmail.com>
|
||||
Rickey Bowers Jr <bitRAKE@gmail.com>
|
||||
Riley Stewart <ristew@users.noreply.github.com>
|
||||
Rinne <AsakusaRinne@gmail.com>
|
||||
Rinne <liu_yaohui1998@126.com>
|
||||
Robert Brisita <986796+rbrisita@users.noreply.github.com>
|
||||
Robert Sung-wook Shin <edp1096@users.noreply.github.com>
|
||||
Robey Holderith <robey@flaminglunchbox.net>
|
||||
Robyn <robyngraf@users.noreply.github.com>
|
||||
Roger Meier <r.meier@siemens.com>
|
||||
Roland <14355895+rbur0425@users.noreply.github.com>
|
||||
Romain D <90720+Artefact2@users.noreply.github.com>
|
||||
Romain Neutron <romain@neutron.io>
|
||||
Roman Parykin <donderom@gmail.com>
|
||||
Ron Evans <ron@hybridgroup.com>
|
||||
Ron Jailall <rojailal@gmail.com>
|
||||
Ronny Brendel <ronnybrendel@gmail.com>
|
||||
Ronsor <ronsor@ronsor.pw>
|
||||
Rowan Hart <rowanbhart@gmail.com>
|
||||
Rune <43761327+Rune-AI@users.noreply.github.com>
|
||||
Ryan Landay <rlanday@gmail.com>
|
||||
Ryder Wishart <ryderwishart@gmail.com>
|
||||
Rőczey Barnabás <31726601+An0nie@users.noreply.github.com>
|
||||
SakuraUmi <yukinon244@gmail.com>
|
||||
Salvador E. Tropea <stropea@inti.gob.ar>
|
||||
Sam Spilsbury <smspillaz@gmail.com>
|
||||
Sami Farin <3876865+Safari77@users.noreply.github.com>
|
||||
Samuel Maynard <samwmaynard@gmail.com>
|
||||
Sang-Kil Park <sang.park@42dot.ai>
|
||||
Seb C <47074056+Sebby37@users.noreply.github.com>
|
||||
Sebastián A <sebastian.aedo29@gmail.com>
|
||||
SebastianApel <13675545+SebastianApel@users.noreply.github.com>
|
||||
Senemu <10880819+Senemu@users.noreply.github.com>
|
||||
Sergey Alirzaev <zl29ah@gmail.com>
|
||||
Sergio López <slp@sinrega.org>
|
||||
SeungWon Jeong <65549245+redlion0929@users.noreply.github.com>
|
||||
ShadovvBeast <ShadovvBeast@gmail.com>
|
||||
Shakhar Dasgupta <shakhardasgupta@gmail.com>
|
||||
Shangning Xu <32517059+xushangning@users.noreply.github.com>
|
||||
Shijie <821898965@qq.com>
|
||||
Shintarou Okada <kokuzen@gmail.com>
|
||||
Shouzheng Liu <61452103+lshzh-ww@users.noreply.github.com>
|
||||
Shouzheng Liu <lshzh.hi@gmail.com>
|
||||
Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
|
||||
Simon Willison <swillison@gmail.com>
|
||||
Siwen Yu <yusiwen@gmail.com>
|
||||
Sky Yan <skyan83@gmail.com>
|
||||
Slaren <2141330+slaren@users.noreply.github.com>
|
||||
Slava Primenko <primenko.s@gmail.com>
|
||||
SoftwareRenderer <138734813+SoftwareRenderer@users.noreply.github.com>
|
||||
Someone <sergei.kozlukov@aalto.fi>
|
||||
Someone Serge <sergei.kozlukov@aalto.fi>
|
||||
Sourab Mangrulkar <13534540+pacman100@users.noreply.github.com>
|
||||
Spencer Sutton <spencersutton@users.noreply.github.com>
|
||||
Srinivas Billa <nivibilla@gmail.com>
|
||||
Stefan Sydow <stefan@sydow.email>
|
||||
Stephan Walter <stephan@walter.name>
|
||||
Stephen Nichols <snichols@users.noreply.github.com>
|
||||
Steve Grubb <ausearch.1@gmail.com>
|
||||
Steven Roussey <sroussey@gmail.com>
|
||||
Steward Garcia <57494570+FSSRepo@users.noreply.github.com>
|
||||
Suaj Carrot <72162667+SuajCarrot@users.noreply.github.com>
|
||||
SuperUserNameMan <yoann@terminajones.com>
|
||||
Tai Duc Nguyen <taiducnguyen.drexel@gmail.com>
|
||||
Taikono-Himazin <kazu@po.harenet.ne.jp>
|
||||
Tameem <113388789+AhmadTameem@users.noreply.github.com>
|
||||
Tamotsu Takahashi <ttakah+github@gmail.com>
|
||||
Thái Hoàng Tâm <75922889+RoyalHeart@users.noreply.github.com>
|
||||
Thatcher Chamberlin <j.thatcher.c@gmail.com>
|
||||
Theia Vogel <theia@vgel.me>
|
||||
Thérence <13496987+Royalphax@users.noreply.github.com>
|
||||
Thibault Terrasson <thibault.terrasson@gmail.com>
|
||||
Thomas Klausner <wiz@gatalith.at>
|
||||
Tim Miller <drasticactions@users.noreply.github.com>
|
||||
Timmy Knight <r2d2fish@gmail.com>
|
||||
Timothy Cronin <40186632+4imothy@users.noreply.github.com>
|
||||
Ting Lou <ting.lou@gmail.com>
|
||||
Ting Sun <suntcrick@gmail.com>
|
||||
Tobias Lütke <tobi@shopify.com>
|
||||
Tom C <tom.corelis@gmail.com>
|
||||
Tom Jobbins <784313+TheBloke@users.noreply.github.com>
|
||||
Tomas <tom.tomas.36478119@gmail.com>
|
||||
Tomáš Pazdiora <tomas.pazdiora@gmail.com>
|
||||
Tristan Ross <rosscomputerguy@protonmail.com>
|
||||
Tungsten842 <886724vf@anonaddy.me>
|
||||
Tungsten842 <quantmint@protonmail.com>
|
||||
Tushar <ditsuke@protonmail.com>
|
||||
UEXTM.com <84163508+uextm@users.noreply.github.com>
|
||||
Uzo Nweke <uzoechi@gmail.com>
|
||||
Vaibhav Srivastav <vaibhavs10@gmail.com>
|
||||
Val Kharitonov <mail@kharvd.com>
|
||||
Valentin Konovalov <valle.ketsujin@gmail.com>
|
||||
Valentyn Bezshapkin <61702053+valentynbez@users.noreply.github.com>
|
||||
Victor Z. Peng <ziliangdotme@gmail.com>
|
||||
Vlad <spitfireage@gmail.com>
|
||||
Vladimir <bogdad@gmail.com>
|
||||
Vladimir Malyutin <first-leon@yandex.ru>
|
||||
Vladimir Zorin <vladimir@deviant.guru>
|
||||
Volodymyr Vitvitskyi <72226+signalpillar@users.noreply.github.com>
|
||||
WangHaoranRobin <56047610+WangHaoranRobin@users.noreply.github.com>
|
||||
Weird Constructor <weirdconstructor@gmail.com>
|
||||
Welby Seely <welbyseely@gmail.com>
|
||||
Wentai Zhang <rchardx@gmail.com>
|
||||
WillCorticesAI <150854901+WillCorticesAI@users.noreply.github.com>
|
||||
Willy Tarreau <w@1wt.eu>
|
||||
Wu Jian Ping <wujjpp@hotmail.com>
|
||||
Wu Jian Ping <wujp@greatld.com>
|
||||
Xiake Sun <xiake.sun@intel.com>
|
||||
Xiang (Kevin) Li <kevinli020508@gmail.com>
|
||||
Xiao-Yong Jin <jinxiaoyong@gmail.com>
|
||||
XiaotaoChen <chenxiaotao1234@gmail.com>
|
||||
Xiaoyi Chen <cxychina@gmail.com>
|
||||
Xingchen Song(宋星辰) <xingchensong1996@163.com>
|
||||
Xuan Son Nguyen <thichthat@gmail.com>
|
||||
Yann Follet <131855179+YannFollet@users.noreply.github.com>
|
||||
Yiming Cui <conandiy@vip.qq.com>
|
||||
Yishuo Wang <MeouSker77@outlook.com>
|
||||
Yueh-Po Peng <94939112+y10ab1@users.noreply.github.com>
|
||||
Yui <dev@sleepyyui.com>
|
||||
Yusuf Kağan Hanoğlu <hanoglu@yahoo.com>
|
||||
Yuval Peled <31162840+Yuval-Peled@users.noreply.github.com>
|
||||
ZHAOKAI WANG <sanxianwei@163.com>
|
||||
Zane Shannon <z@zcs.me>
|
||||
Zay <95888118+isaiahbjork@users.noreply.github.com>
|
||||
Zenix <zenixls2@gmail.com>
|
||||
Zhang Peiyuan <a1286225768@gmail.com>
|
||||
ZhouYuChen <zhouyuchen@naver.com>
|
||||
Ziad Ben Hadj-Alouane <zied.benhadjalouane@gmail.com>
|
||||
Ziang Wu <97337387+ZiangWu-77@users.noreply.github.com>
|
||||
Zsapi <martin1.zsapka@gmail.com>
|
||||
a-n-n-a-l-e-e <150648636+a-n-n-a-l-e-e@users.noreply.github.com>
|
||||
adel boussaken <netdur@gmail.com>
|
||||
afrideva <95653597+afrideva@users.noreply.github.com>
|
||||
akawrykow <142945436+akawrykow@users.noreply.github.com>
|
||||
alexpinel <93524949+alexpinel@users.noreply.github.com>
|
||||
alonfaraj <alonfaraj@gmail.com>
|
||||
andrijdavid <david@geek.mg>
|
||||
anon998 <131767832+anon998@users.noreply.github.com>
|
||||
anzz1 <anzz1@live.com>
|
||||
apaz <aarpazdera@gmail.com>
|
||||
apcameron <37645737+apcameron@users.noreply.github.com>
|
||||
arcrank <arcrank@gmail.com>
|
||||
arlo-phoenix <140345165+arlo-phoenix@users.noreply.github.com>
|
||||
at8u <129688334+at8u@users.noreply.github.com>
|
||||
automaticcat <daogiatuank54@gmail.com>
|
||||
bandoti <141645996+bandoti@users.noreply.github.com>
|
||||
beiller <beiller@gmail.com>
|
||||
bhubbb <79117352+bhubbb@users.noreply.github.com>
|
||||
bmwl <brian.marshall@tolko.com>
|
||||
bobqianic <129547291+bobqianic@users.noreply.github.com>
|
||||
bryanSwk <93190252+bryanSwk@users.noreply.github.com>
|
||||
bsilvereagle <bsilvereagle@users.noreply.github.com>
|
||||
bssrdf <merlintiger@hotmail.com>
|
||||
byte-6174 <88070277+byte-6174@users.noreply.github.com>
|
||||
cebtenzzre <cebtenzzre@gmail.com>
|
||||
chaihahaha <chai836275709@gmail.com>
|
||||
chiranko <96988916+chiranko@users.noreply.github.com>
|
||||
clibdev <52199778+clibdev@users.noreply.github.com>
|
||||
clyang <clyang@clyang.net>
|
||||
cocktailpeanut <121128867+cocktailpeanut@users.noreply.github.com>
|
||||
coezbek <c.oezbek@gmail.com>
|
||||
comex <comexk@gmail.com>
|
||||
compilade <113953597+compilade@users.noreply.github.com>
|
||||
crasm <crasm@git.vczf.net>
|
||||
crasm <crasm@git.vczf.us>
|
||||
daboe01 <daboe01@googlemail.com>
|
||||
david raistrick <keen99@users.noreply.github.com>
|
||||
ddpasa <112642920+ddpasa@users.noreply.github.com>
|
||||
deepdiffuser <112834445+deepdiffuser@users.noreply.github.com>
|
||||
divinity76 <divinity76@gmail.com>
|
||||
dotpy314 <33351922+dotpy314@users.noreply.github.com>
|
||||
drbh <david.richard.holtz@gmail.com>
|
||||
ds5t5 <145942675+ds5t5@users.noreply.github.com>
|
||||
dylan <canardleteer@users.noreply.github.com>
|
||||
eastriver <lee@eastriver.dev>
|
||||
ebraminio <ebraminio@gmail.com>
|
||||
eiery <19350831+eiery@users.noreply.github.com>
|
||||
eric8607242 <e0928021388@gmail.com>
|
||||
fraxy-v <65565042+fraxy-v@users.noreply.github.com>
|
||||
github-actions[bot] <github-actions[bot]@users.noreply.github.com>
|
||||
gliptic <gliptic@users.noreply.github.com>
|
||||
goerch <jhr.walter@t-online.de>
|
||||
grahameth <96447521+grahameth@users.noreply.github.com>
|
||||
gwjr <502526+gwjr@users.noreply.github.com>
|
||||
h-h-h-h <13482553+h-h-h-h@users.noreply.github.com>
|
||||
hankcs <cnhankmc@gmail.com>
|
||||
hoangmit <hoangmit@users.noreply.github.com>
|
||||
hongbo.mo <352280764@qq.com>
|
||||
howlger <eclipse@voormann.de>
|
||||
howlger <github@voormann.de>
|
||||
hutli <6594598+hutli@users.noreply.github.com>
|
||||
hutli <hutli@hutli.hu>
|
||||
hutli <jensstaermose@hotmail.com>
|
||||
hxer7963 <hxer7963@gmail.com>
|
||||
hydai <z54981220@gmail.com>
|
||||
iSma <ismail.senhaji@gmail.com>
|
||||
iacore <74560659+iacore@users.noreply.github.com>
|
||||
igarnier <igarnier@protonmail.com>
|
||||
iohub <rickyang.pro@gmail.com>
|
||||
jacobi petrucciani <8117202+jpetrucciani@users.noreply.github.com>
|
||||
jameswu2014 <545426914@qq.com>
|
||||
jneem <joeneeman@gmail.com>
|
||||
johnson442 <56517414+johnson442@users.noreply.github.com>
|
||||
jon-chuang <9093549+jon-chuang@users.noreply.github.com>
|
||||
jp-x-g <jpxg-dev@protonmail.com>
|
||||
jwj7140 <32943891+jwj7140@users.noreply.github.com>
|
||||
kaizau <kaizau@users.noreply.github.com>
|
||||
kalomaze <66376113+kalomaze@users.noreply.github.com>
|
||||
kang <tpdns9032100@gmail.com>
|
||||
katsu560 <118887472+katsu560@users.noreply.github.com>
|
||||
kchro3 <62481661+kchro3@users.noreply.github.com>
|
||||
khimaros <me@khimaros.com>
|
||||
kiltyj <kiltyj@gmail.com>
|
||||
klosax <131523366+klosax@users.noreply.github.com>
|
||||
kunal-vaishnavi <115581922+kunal-vaishnavi@users.noreply.github.com>
|
||||
kunnis <kunnis@users.noreply.github.com>
|
||||
kuronekosaiko <EvanChanJ@163.com>
|
||||
kuvaus <22169537+kuvaus@users.noreply.github.com>
|
||||
kwin1412 <42286931+kwin1412@users.noreply.github.com>
|
||||
l3utterfly <gc.pthzfoldr@gmail.com>
|
||||
ldwang <ftgreat@163.com>
|
||||
le.chang <cljs118@126.com>
|
||||
leejet <leejet714@gmail.com>
|
||||
limitedAtonement <limitedAtonement@users.noreply.github.com>
|
||||
lon <114724657+longregen@users.noreply.github.com>
|
||||
m3ndax <adrian.goessl@outlook.com>
|
||||
maddes8cht <55592906+maddes8cht@users.noreply.github.com>
|
||||
makomk <makosoft@googlemail.com>
|
||||
manikbhandari <mbbhandarimanik2@gmail.com>
|
||||
mdrokz <mohammadmunshi@gmail.com>
|
||||
mgroeber9110 <45620825+mgroeber9110@users.noreply.github.com>
|
||||
minarchist <minarchist@users.noreply.github.com>
|
||||
mj-shifu <77107165+mj-shifu@users.noreply.github.com>
|
||||
mmyjona <jonathan.gonse@gmail.com>
|
||||
momonga <115213907+mmnga@users.noreply.github.com>
|
||||
moritzbrantner <31051084+moritzbrantner@users.noreply.github.com>
|
||||
mzcu <milos.cubrilo@gmail.com>
|
||||
nanahi <130121847+na-na-hi@users.noreply.github.com>
|
||||
ngc92 <7938269+ngc92@users.noreply.github.com>
|
||||
nhamanasu <45545786+nhamanasu@users.noreply.github.com>
|
||||
niansa/tuxifan <anton-sa@web.de>
|
||||
niansa/tuxifan <tuxifan@posteo.de>
|
||||
ningshanwutuobang <ningshanwutuobang@gmail.com>
|
||||
nold <Nold360@users.noreply.github.com>
|
||||
nopperl <54780682+nopperl@users.noreply.github.com>
|
||||
nusu-github <29514220+nusu-github@users.noreply.github.com>
|
||||
olexiyb <olexiyb@gmail.com>
|
||||
oobabooga <112222186+oobabooga@users.noreply.github.com>
|
||||
opparco <parco.opaai@gmail.com>
|
||||
ostix360 <55257054+ostix360@users.noreply.github.com>
|
||||
perserk <perserk@gmail.com>
|
||||
postmasters <namnguyen@google.com>
|
||||
pudepiedj <pudepiedj@gmail.com>
|
||||
qingfengfenga <41416092+qingfengfenga@users.noreply.github.com>
|
||||
qouoq <qouoq@fastmail.com>
|
||||
qunash <anzoria@gmail.com>
|
||||
rabidcopy <rabidcopy@yahoo.com>
|
||||
rankaiyx <rankaiyx@rankaiyx.com>
|
||||
rhjdvsgsgks <26178113+rhjdvsgsgks@users.noreply.github.com>
|
||||
rhuddleston <ryan.huddleston@percona.com>
|
||||
rimoliga <53384203+rimoliga@users.noreply.github.com>
|
||||
runfuture <runfuture@users.noreply.github.com>
|
||||
sandyiscool <sandyiscool@gmail.com>
|
||||
semidark <me@semidark.net>
|
||||
sharpHL <132747147+sharpHL@users.noreply.github.com>
|
||||
shibe2 <shibe@tuta.io>
|
||||
singularity <12184989+singularity-s0@users.noreply.github.com>
|
||||
sjinzh <sjinzh@gmail.com>
|
||||
slaren <2141330+slaren@users.noreply.github.com>
|
||||
slaren <slarengh@gmail.com>
|
||||
snadampal <87143774+snadampal@users.noreply.github.com>
|
||||
staviq <staviq@gmail.com>
|
||||
stduhpf <stephduh@live.fr>
|
||||
swittk <switt1995@gmail.com>
|
||||
takov751 <40316768+takov751@users.noreply.github.com>
|
||||
tarcey <cey.tarik@gmail.com>
|
||||
texmex76 <40733439+texmex76@users.noreply.github.com>
|
||||
thement <40525767+thement@users.noreply.github.com>
|
||||
tjohnman <tjohnman@users.noreply.github.com>
|
||||
tslmy <tslmy@users.noreply.github.com>
|
||||
ubik2 <ubik2@users.noreply.github.com>
|
||||
uint256_t <konndennsa@gmail.com>
|
||||
uint256_t <maekawatoshiki1017@gmail.com>
|
||||
unbounded <haakon@likedan.net>
|
||||
valiray <133289098+valiray@users.noreply.github.com>
|
||||
vodkaslime <646329483@qq.com>
|
||||
vvhg1 <94630311+vvhg1@users.noreply.github.com>
|
||||
vxiiduu <73044267+vxiiduu@users.noreply.github.com>
|
||||
wbpxre150 <100937007+wbpxre150@users.noreply.github.com>
|
||||
whoreson <139810751+whoreson@users.noreply.github.com>
|
||||
wonjun Jang <strutive07@gmail.com>
|
||||
wzy <32936898+Freed-Wu@users.noreply.github.com>
|
||||
xaedes <xaedes@gmail.com>
|
||||
xaedes <xaedes@googlemail.com>
|
||||
xloem <0xloem@gmail.com>
|
||||
yangli2 <yangli2@gmail.com>
|
||||
yuiseki <yuiseki@gmail.com>
|
||||
zakkor <edward.partenie@gmail.com>
|
||||
zhouwg <6889919+zhouwg@users.noreply.github.com>
|
||||
zrm <trustiosity.zrm@gmail.com>
|
||||
源文雨 <41315874+fumiama@users.noreply.github.com>
|
||||
Нияз Гарифзянов <112617865+garrnizon@users.noreply.github.com>
|
||||
2
LICENSE
2
LICENSE
@@ -1,6 +1,6 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2023 Georgi Gerganov
|
||||
Copyright (c) 2023-2024 The ggml authors
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
||||
11
README.md
11
README.md
@@ -115,6 +115,7 @@ Typically finetunes of the base models below are supported as well.
|
||||
- [x] [CodeShell](https://github.com/WisdomShell/codeshell)
|
||||
- [x] [Gemma](https://ai.google.dev/gemma)
|
||||
- [x] [Mamba](https://github.com/state-spaces/mamba)
|
||||
- [x] [Xverse](https://huggingface.co/models?search=xverse)
|
||||
- [x] [Command-R](https://huggingface.co/CohereForAI/c4ai-command-r-v01)
|
||||
|
||||
**Multimodal models:**
|
||||
@@ -175,6 +176,7 @@ Unless otherwise noted these projects are open-source with permissive licensing:
|
||||
- [Mobile-Artificial-Intelligence/maid](https://github.com/Mobile-Artificial-Intelligence/maid) (MIT)
|
||||
- [Msty](https://msty.app) (proprietary)
|
||||
- [LLMFarm](https://github.com/guinmoon/LLMFarm?tab=readme-ov-file) (MIT)
|
||||
- [KanTV](https://github.com/zhouwg/kantv?tab=readme-ov-file)(Apachev2.0 or later)
|
||||
|
||||
*(to have a project listed here, it should clearly state that it depends on `llama.cpp`)*
|
||||
|
||||
@@ -634,15 +636,6 @@ Building the program with BLAS support may lead to some performance improvements
|
||||
|
||||
- #### Vulkan
|
||||
|
||||
> [!WARNING]
|
||||
>
|
||||
> Vulkan support has been broken in https://github.com/ggerganov/llama.cpp/pull/6122
|
||||
> due to relying on `GGML_OP_GET_ROWS` which is not yet properly supported by the Vulkan backend,
|
||||
> but should be fixed relatively soon (possibly in https://github.com/ggerganov/llama.cpp/pull/6155
|
||||
> (ref: https://github.com/ggerganov/llama.cpp/pull/6122#issuecomment-2015327635)).
|
||||
>
|
||||
> Meanwhile, if you want to use the Vulkan backend, you should use the commit right before the breaking change, https://github.com/ggerganov/llama.cpp/commit/55c1b2a3bbd470e9e2a3a0618b92cf64a885f806
|
||||
|
||||
**With docker**:
|
||||
|
||||
You don't need to install Vulkan SDK. It will be installed inside the container.
|
||||
|
||||
@@ -575,7 +575,7 @@ function gg_run_embd_bge_small {
|
||||
cd ${SRC}
|
||||
|
||||
gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/config.json
|
||||
gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/resolve/main/tokenizer.model
|
||||
gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/tokenizer.json
|
||||
gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/tokenizer_config.json
|
||||
gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/special_tokens_map.json
|
||||
gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/resolve/main/pytorch_model.bin
|
||||
|
||||
@@ -773,6 +773,148 @@ class BaichuanModel(Model):
|
||||
return weights[r * n_part:r * n_part + r, ...]
|
||||
|
||||
|
||||
@Model.register("XverseForCausalLM")
|
||||
class XverseModel(Model):
|
||||
model_arch = gguf.MODEL_ARCH.XVERSE
|
||||
|
||||
def set_vocab(self):
|
||||
assert (self.dir_model / "tokenizer.json").is_file()
|
||||
dir_model = self.dir_model
|
||||
hparams = self.hparams
|
||||
|
||||
tokens: list[bytearray] = []
|
||||
toktypes: list[int] = []
|
||||
|
||||
from transformers import AutoTokenizer
|
||||
tokenizer = AutoTokenizer.from_pretrained(dir_model)
|
||||
vocab_size = hparams.get("vocab_size", len(tokenizer.vocab))
|
||||
assert max(tokenizer.vocab.values()) < vocab_size
|
||||
|
||||
reverse_vocab = {id_: encoded_tok for encoded_tok, id_ in tokenizer.vocab.items()}
|
||||
added_vocab = tokenizer.get_added_vocab()
|
||||
|
||||
for token_id in range(vocab_size):
|
||||
token_text = reverse_vocab[token_id].encode('utf-8')
|
||||
# replace "\x00" to string with length > 0
|
||||
if token_text == b"\x00":
|
||||
toktype = gguf.TokenType.BYTE # special
|
||||
token_text = f"<{token_text}>".encode('utf-8')
|
||||
elif re.fullmatch(br"<0x[0-9A-Fa-f]{2}>", token_text):
|
||||
toktype = gguf.TokenType.BYTE # special
|
||||
elif reverse_vocab[token_id] in added_vocab:
|
||||
if tokenizer.added_tokens_decoder[token_id].special:
|
||||
toktype = gguf.TokenType.CONTROL
|
||||
else:
|
||||
toktype = gguf.TokenType.USER_DEFINED
|
||||
else:
|
||||
toktype = gguf.TokenType.NORMAL
|
||||
|
||||
tokens.append(token_text)
|
||||
toktypes.append(toktype)
|
||||
|
||||
self.gguf_writer.add_tokenizer_model("llama")
|
||||
self.gguf_writer.add_token_list(tokens)
|
||||
self.gguf_writer.add_token_types(toktypes)
|
||||
|
||||
special_vocab = gguf.SpecialVocab(dir_model, n_vocab=len(tokens))
|
||||
special_vocab.add_to_gguf(self.gguf_writer)
|
||||
|
||||
def set_gguf_parameters(self):
|
||||
block_count = self.hparams["num_hidden_layers"]
|
||||
head_count = self.hparams["num_attention_heads"]
|
||||
head_count_kv = self.hparams.get("num_key_value_heads", head_count)
|
||||
hf_repo = self.hparams.get("_name_or_path", "")
|
||||
|
||||
ctx_length = 0
|
||||
if "max_sequence_length" in self.hparams:
|
||||
ctx_length = self.hparams["max_sequence_length"]
|
||||
elif "max_position_embeddings" in self.hparams:
|
||||
ctx_length = self.hparams["max_position_embeddings"]
|
||||
elif "model_max_length" in self.hparams:
|
||||
ctx_length = self.hparams["model_max_length"]
|
||||
else:
|
||||
print("gguf: can not find ctx length parameter.")
|
||||
sys.exit()
|
||||
|
||||
self.gguf_writer.add_name(self.dir_model.name)
|
||||
self.gguf_writer.add_source_hf_repo(hf_repo)
|
||||
self.gguf_writer.add_tensor_data_layout("Meta AI original pth")
|
||||
self.gguf_writer.add_context_length(ctx_length)
|
||||
self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
|
||||
self.gguf_writer.add_block_count(block_count)
|
||||
self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"])
|
||||
self.gguf_writer.add_rope_dimension_count(self.hparams["hidden_size"] // self.hparams["num_attention_heads"])
|
||||
self.gguf_writer.add_head_count(head_count)
|
||||
self.gguf_writer.add_head_count_kv(head_count_kv)
|
||||
self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
|
||||
|
||||
if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]:
|
||||
if self.hparams["rope_scaling"].get("type") == "linear":
|
||||
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
|
||||
self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"])
|
||||
|
||||
def write_tensors(self):
|
||||
# Collect tensors from generator object
|
||||
model_kv = dict(self.get_tensors())
|
||||
block_count = self.hparams["num_hidden_layers"]
|
||||
head_count = self.hparams["num_attention_heads"]
|
||||
tensor_map = gguf.get_tensor_name_map(self.model_arch, block_count)
|
||||
head_count_kv = self.hparams.get("num_key_value_heads", head_count)
|
||||
|
||||
for name, data_torch in model_kv.items():
|
||||
# we don't need these
|
||||
if name.endswith(".rotary_emb.inv_freq"):
|
||||
continue
|
||||
|
||||
old_dtype = data_torch.dtype
|
||||
|
||||
# convert any unsupported data types to float32
|
||||
if data_torch.dtype not in (torch.float16, torch.float32):
|
||||
data_torch = data_torch.to(torch.float32)
|
||||
|
||||
# HF models permute some of the tensors, so we need to undo that
|
||||
if name.endswith(("q_proj.weight")):
|
||||
data_torch = self._reverse_hf_permute(data_torch, head_count, head_count)
|
||||
if name.endswith(("k_proj.weight")):
|
||||
data_torch = self._reverse_hf_permute(data_torch, head_count, head_count_kv)
|
||||
|
||||
data = data_torch.squeeze().numpy()
|
||||
|
||||
# map tensor names
|
||||
new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias"))
|
||||
if new_name is None:
|
||||
print(f"Can not map tensor {name!r}")
|
||||
sys.exit()
|
||||
|
||||
n_dims = len(data.shape)
|
||||
data_dtype = data.dtype
|
||||
|
||||
# if f32 desired, convert any float16 to float32
|
||||
if self.ftype == 0 and data_dtype == np.float16:
|
||||
data = data.astype(np.float32)
|
||||
|
||||
# TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
|
||||
if self.ftype == 1 and data_dtype == np.float16 and n_dims == 1:
|
||||
data = data.astype(np.float32)
|
||||
|
||||
# if f16 desired, convert any float32 2-dim weight tensors to float16
|
||||
if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
|
||||
data = data.astype(np.float16)
|
||||
|
||||
print(f"{name} -> {new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}")
|
||||
self.gguf_writer.add_tensor(new_name, data)
|
||||
|
||||
def _reverse_hf_permute(self, weights: Tensor, n_head: int, n_kv_head: int | None = None) -> Tensor:
|
||||
if n_kv_head is not None and n_head != n_kv_head:
|
||||
n_head //= n_kv_head
|
||||
|
||||
return (
|
||||
weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
|
||||
.swapaxes(1, 2)
|
||||
.reshape(weights.shape)
|
||||
)
|
||||
|
||||
|
||||
@Model.register("FalconForCausalLM", "RWForCausalLM")
|
||||
class FalconModel(Model):
|
||||
model_arch = gguf.MODEL_ARCH.FALCON
|
||||
|
||||
@@ -28,9 +28,11 @@ enum split_operation : uint8_t {
|
||||
|
||||
struct split_params {
|
||||
split_operation operation = SPLIT_OP_SPLIT;
|
||||
size_t n_bytes_split = 0;
|
||||
int n_split_tensors = 128;
|
||||
std::string input;
|
||||
std::string output;
|
||||
bool dry_run = false;
|
||||
};
|
||||
|
||||
static void split_print_usage(const char * executable) {
|
||||
@@ -41,15 +43,36 @@ static void split_print_usage(const char * executable) {
|
||||
printf("Apply a GGUF operation on IN to OUT.");
|
||||
printf("\n");
|
||||
printf("options:\n");
|
||||
printf(" -h, --help show this help message and exit\n");
|
||||
printf(" --version show version and build info\n");
|
||||
printf(" --split split GGUF to multiple GGUF (default)\n");
|
||||
printf(" --split-max-tensors max tensors in each split: default(%d)\n", default_params.n_split_tensors);
|
||||
printf(" --merge merge multiple GGUF to a single GGUF\n");
|
||||
printf(" -h, --help show this help message and exit\n");
|
||||
printf(" --version show version and build info\n");
|
||||
printf(" --split split GGUF to multiple GGUF (enabled by default)\n");
|
||||
printf(" --merge merge multiple GGUF to a single GGUF\n");
|
||||
printf(" --split-max-tensors max tensors in each split (default: %d)\n", default_params.n_split_tensors);
|
||||
printf(" --split-max-size N(M|G) max size per split\n");
|
||||
printf(" --dry-run only print out a split plan and exit, without writing any new files\n");
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
static bool split_params_parse_ex(int argc, const char ** argv, split_params & params) {
|
||||
// return convert string, for example "128M" or "4G" to number of bytes
|
||||
static size_t split_str_to_n_bytes(std::string str) {
|
||||
size_t n_bytes = 0;
|
||||
int n;
|
||||
if (str.back() == 'M') {
|
||||
sscanf(str.c_str(), "%d", &n);
|
||||
n_bytes = n * 1024 * 1024; // megabytes
|
||||
} else if (str.back() == 'G') {
|
||||
sscanf(str.c_str(), "%d", &n);
|
||||
n_bytes = n * 1024 * 1024 * 1024; // gigabytes
|
||||
} else {
|
||||
throw std::invalid_argument("error: supported units are M (megabytes) or G (gigabytes), but got: " + std::string(1, str.back()));
|
||||
}
|
||||
if (n <= 0) {
|
||||
throw std::invalid_argument("error: size must be a positive value");
|
||||
}
|
||||
return n_bytes;
|
||||
}
|
||||
|
||||
static void split_params_parse_ex(int argc, const char ** argv, split_params & params) {
|
||||
std::string arg;
|
||||
const std::string arg_prefix = "--";
|
||||
bool invalid_param = false;
|
||||
@@ -62,6 +85,8 @@ static bool split_params_parse_ex(int argc, const char ** argv, split_params & p
|
||||
}
|
||||
|
||||
bool arg_found = false;
|
||||
bool is_op_set = false;
|
||||
bool is_mode_set = false;
|
||||
if (arg == "-h" || arg == "--help") {
|
||||
split_print_usage(argv[0]);
|
||||
exit(0);
|
||||
@@ -71,23 +96,46 @@ static bool split_params_parse_ex(int argc, const char ** argv, split_params & p
|
||||
fprintf(stderr, "built with %s for %s\n", LLAMA_COMPILER, LLAMA_BUILD_TARGET);
|
||||
exit(0);
|
||||
}
|
||||
if (arg == "--dry-run") {
|
||||
arg_found = true;
|
||||
params.dry_run = true;
|
||||
}
|
||||
|
||||
if (is_op_set) {
|
||||
throw std::invalid_argument("error: either --split or --merge can be specified, but not both");
|
||||
}
|
||||
if (arg == "--merge") {
|
||||
arg_found = true;
|
||||
is_op_set = true;
|
||||
params.operation = SPLIT_OP_MERGE;
|
||||
}
|
||||
if (arg == "--split") {
|
||||
arg_found = true;
|
||||
is_op_set = true;
|
||||
params.operation = SPLIT_OP_SPLIT;
|
||||
}
|
||||
|
||||
if (is_mode_set) {
|
||||
throw std::invalid_argument("error: either --split-max-tensors or --split-max-size can be specified, but not both");
|
||||
}
|
||||
if (arg == "--split-max-tensors") {
|
||||
if (++arg_idx >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
arg_found = true;
|
||||
is_mode_set = true;
|
||||
params.n_split_tensors = atoi(argv[arg_idx]);
|
||||
}
|
||||
if (arg == "--split-max-size") {
|
||||
if (++arg_idx >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
arg_found = true;
|
||||
is_mode_set = true;
|
||||
params.n_bytes_split = split_str_to_n_bytes(argv[arg_idx]);
|
||||
}
|
||||
|
||||
if (!arg_found) {
|
||||
throw std::invalid_argument("error: unknown argument: " + arg);
|
||||
@@ -99,24 +147,17 @@ static bool split_params_parse_ex(int argc, const char ** argv, split_params & p
|
||||
}
|
||||
|
||||
if (argc - arg_idx < 2) {
|
||||
printf("%s: bad arguments\n", argv[0]);
|
||||
split_print_usage(argv[0]);
|
||||
return false;
|
||||
throw std::invalid_argument("error: bad arguments");
|
||||
}
|
||||
|
||||
params.input = argv[arg_idx++];
|
||||
params.output = argv[arg_idx++];
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool split_params_parse(int argc, const char ** argv, split_params & params) {
|
||||
bool result = true;
|
||||
try {
|
||||
if (!split_params_parse_ex(argc, argv, params)) {
|
||||
split_print_usage(argv[0]);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
split_params_parse_ex(argc, argv, params);
|
||||
}
|
||||
catch (const std::invalid_argument & ex) {
|
||||
fprintf(stderr, "%s\n", ex.what());
|
||||
@@ -140,15 +181,11 @@ struct split_strategy {
|
||||
struct ggml_context * ctx_meta = NULL;
|
||||
const int n_tensors;
|
||||
|
||||
const int n_split;
|
||||
int i_split = 0;
|
||||
// one ctx_out per one output file
|
||||
std::vector<struct gguf_context *> ctx_outs;
|
||||
|
||||
int i_tensor = 0;
|
||||
|
||||
std::vector<uint8_t> read_data;
|
||||
|
||||
struct gguf_context * ctx_out;
|
||||
std::ofstream fout;
|
||||
// temporary buffer for reading in tensor data
|
||||
std::vector<uint8_t> read_buf;
|
||||
|
||||
split_strategy(const split_params & params,
|
||||
std::ifstream & f_input,
|
||||
@@ -158,79 +195,141 @@ struct split_strategy {
|
||||
f_input(f_input),
|
||||
ctx_gguf(ctx_gguf),
|
||||
ctx_meta(ctx_meta),
|
||||
n_tensors(gguf_get_n_tensors(ctx_gguf)),
|
||||
n_split(std::ceil(1. * n_tensors / params.n_split_tensors)) {
|
||||
n_tensors(gguf_get_n_tensors(ctx_gguf)) {
|
||||
|
||||
// because we need to know list of tensors for each file in advance, we will build all the ctx_out for all output splits
|
||||
int i_split = -1;
|
||||
struct gguf_context * ctx_out = NULL;
|
||||
auto new_ctx_out = [&]() {
|
||||
i_split++;
|
||||
if (ctx_out != NULL) {
|
||||
if (gguf_get_n_tensors(ctx_out) == 0) {
|
||||
fprintf(stderr, "error: one of splits have 0 tensors. Maybe size or tensors limit is too small\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
ctx_outs.push_back(ctx_out);
|
||||
}
|
||||
ctx_out = gguf_init_empty();
|
||||
// Save all metadata in first split only
|
||||
if (i_split == 0) {
|
||||
gguf_set_kv(ctx_out, ctx_gguf);
|
||||
}
|
||||
gguf_set_val_u16(ctx_out, LLM_KV_SPLIT_NO, i_split);
|
||||
gguf_set_val_u16(ctx_out, LLM_KV_SPLIT_COUNT, 0); // placeholder
|
||||
gguf_set_val_i32(ctx_out, LLM_KV_SPLIT_TENSORS_COUNT, n_tensors);
|
||||
};
|
||||
|
||||
// initialize ctx_out for the first split
|
||||
new_ctx_out();
|
||||
|
||||
// process tensors one by one
|
||||
size_t curr_tensors_size = 0; // current size by counting only tensors size (without metadata)
|
||||
for (int i = 0; i < n_tensors; ++i) {
|
||||
struct ggml_tensor * t = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_gguf, i));
|
||||
// calculate the "imaginary" size = the current size + next tensor size
|
||||
size_t n_bytes = GGML_PAD(ggml_nbytes(t), GGUF_DEFAULT_ALIGNMENT);
|
||||
size_t next_tensors_size = curr_tensors_size + n_bytes;
|
||||
if (should_split(i, next_tensors_size)) {
|
||||
new_ctx_out();
|
||||
curr_tensors_size = n_bytes;
|
||||
} else {
|
||||
curr_tensors_size = next_tensors_size;
|
||||
}
|
||||
gguf_add_tensor(ctx_out, t);
|
||||
}
|
||||
|
||||
bool should_split() const {
|
||||
return i_tensor < n_tensors && i_tensor % params.n_split_tensors == 0;
|
||||
// push the last ctx_out
|
||||
ctx_outs.push_back(ctx_out);
|
||||
|
||||
// set the correct n_split for all ctx_out
|
||||
for (auto & ctx : ctx_outs) {
|
||||
gguf_set_val_u16(ctx, LLM_KV_SPLIT_COUNT, ctx_outs.size());
|
||||
}
|
||||
}
|
||||
|
||||
void split_start() {
|
||||
ctx_out = gguf_init_empty();
|
||||
|
||||
// Save all metadata in first split only
|
||||
if (i_split == 0) {
|
||||
gguf_set_kv(ctx_out, ctx_gguf);
|
||||
~split_strategy() {
|
||||
for (auto & ctx_out : ctx_outs) {
|
||||
gguf_free(ctx_out);
|
||||
}
|
||||
gguf_set_val_u16(ctx_out, LLM_KV_SPLIT_NO, i_split);
|
||||
gguf_set_val_u16(ctx_out, LLM_KV_SPLIT_COUNT, n_split);
|
||||
gguf_set_val_i32(ctx_out, LLM_KV_SPLIT_TENSORS_COUNT, n_tensors);
|
||||
|
||||
// populate the original tensors, so we get an initial metadata
|
||||
for (int i = i_split * params.n_split_tensors; i < n_tensors && i < (i_split + 1) * params.n_split_tensors; ++i) {
|
||||
struct ggml_tensor * meta = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_gguf, i));
|
||||
gguf_add_tensor(ctx_out, meta);
|
||||
}
|
||||
|
||||
char split_path[PATH_MAX] = {0};
|
||||
llama_split_path(split_path, sizeof(split_path), params.output.c_str(), i_split, n_split);
|
||||
|
||||
fprintf(stderr, "%s: %s ...", __func__, split_path);
|
||||
fout = std::ofstream(split_path, std::ios::binary);
|
||||
fout.exceptions(std::ofstream::failbit); // fail fast on write errors
|
||||
|
||||
auto meta_size = gguf_get_meta_size(ctx_out);
|
||||
|
||||
// placeholder for the meta data
|
||||
::zeros(fout, meta_size);
|
||||
|
||||
i_split++;
|
||||
}
|
||||
|
||||
void next_tensor() {
|
||||
const char * t_name = gguf_get_tensor_name(ctx_gguf, i_tensor);
|
||||
struct ggml_tensor * t = ggml_get_tensor(ctx_meta, t_name);
|
||||
auto n_bytes = ggml_nbytes(t);
|
||||
|
||||
if (read_data.size() < n_bytes) {
|
||||
read_data.resize(n_bytes);
|
||||
bool should_split(int i_tensor, size_t next_size) {
|
||||
if (params.n_bytes_split > 0) {
|
||||
// split by max size per file
|
||||
return next_size > params.n_bytes_split;
|
||||
} else {
|
||||
// split by number of tensors per file
|
||||
return i_tensor > 0 && i_tensor < n_tensors && i_tensor % params.n_split_tensors == 0;
|
||||
}
|
||||
|
||||
auto offset = gguf_get_data_offset(ctx_gguf) + gguf_get_tensor_offset(ctx_gguf, i_tensor);
|
||||
f_input.seekg(offset);
|
||||
f_input.read((char *)read_data.data(), n_bytes);
|
||||
|
||||
t->data = read_data.data();
|
||||
|
||||
// write tensor data + padding
|
||||
fout.write((const char *)t->data, n_bytes);
|
||||
zeros(fout, GGML_PAD(n_bytes, GGUF_DEFAULT_ALIGNMENT) - n_bytes);
|
||||
|
||||
i_tensor++;
|
||||
}
|
||||
|
||||
void split_end() {
|
||||
// go back to beginning of file and write the updated metadata
|
||||
fout.seekp(0);
|
||||
std::vector<uint8_t> data(gguf_get_meta_size(ctx_out));
|
||||
gguf_get_meta_data(ctx_out, data.data());
|
||||
fout.write((const char *)data.data(), data.size());
|
||||
void print_info() {
|
||||
printf("n_split: %ld\n", ctx_outs.size());
|
||||
int i_split = 0;
|
||||
for (auto & ctx_out : ctx_outs) {
|
||||
// re-calculate the real gguf size for each split (= metadata size + total size of all tensors)
|
||||
size_t total_size = gguf_get_meta_size(ctx_out);
|
||||
for (int i = 0; i < gguf_get_n_tensors(ctx_out); ++i) {
|
||||
struct ggml_tensor * t = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_out, i));
|
||||
total_size += ggml_nbytes(t);
|
||||
}
|
||||
total_size = total_size / 1024 / 1024; // convert to megabytes
|
||||
printf("split %05d: n_tensors = %d, total_size = %ldM\n", i_split + 1, gguf_get_n_tensors(ctx_out), total_size);
|
||||
i_split++;
|
||||
}
|
||||
}
|
||||
|
||||
fout.close();
|
||||
gguf_free(ctx_out);
|
||||
void write() {
|
||||
int i_split = 0;
|
||||
int n_split = ctx_outs.size();
|
||||
for (auto & ctx_out : ctx_outs) {
|
||||
// construct file path
|
||||
char split_path[PATH_MAX] = {0};
|
||||
llama_split_path(split_path, sizeof(split_path), params.output.c_str(), i_split, n_split);
|
||||
|
||||
fprintf(stderr, "\033[3Ddone\n");
|
||||
// open the output file
|
||||
printf("Writing file %s ... ", split_path);
|
||||
fflush(stdout);
|
||||
std::ofstream fout = std::ofstream(split_path, std::ios::binary);
|
||||
fout.exceptions(std::ofstream::failbit); // fail fast on write errors
|
||||
|
||||
// write metadata
|
||||
std::vector<uint8_t> data(gguf_get_meta_size(ctx_out));
|
||||
gguf_get_meta_data(ctx_out, data.data());
|
||||
fout.write((const char *)data.data(), data.size());
|
||||
|
||||
// write tensors
|
||||
for (int i = 0; i < gguf_get_n_tensors(ctx_out); ++i) {
|
||||
// read tensor meta and prepare buffer
|
||||
const char * t_name = gguf_get_tensor_name(ctx_out, i);
|
||||
struct ggml_tensor * t = ggml_get_tensor(ctx_meta, t_name);
|
||||
auto n_bytes = ggml_nbytes(t);
|
||||
read_buf.resize(n_bytes);
|
||||
|
||||
// calculate offset
|
||||
auto i_tensor_in = gguf_find_tensor(ctx_gguf, t_name); // idx of tensor in the input file
|
||||
auto offset = gguf_get_data_offset(ctx_gguf) + gguf_get_tensor_offset(ctx_gguf, i_tensor_in);
|
||||
|
||||
// copy tensor from input to output file
|
||||
copy_file_to_file(f_input, fout, offset, n_bytes);
|
||||
zeros(fout, GGML_PAD(n_bytes, GGUF_DEFAULT_ALIGNMENT) - n_bytes);
|
||||
}
|
||||
|
||||
printf("done\n");
|
||||
// close the file
|
||||
fout.close();
|
||||
i_split++;
|
||||
}
|
||||
}
|
||||
|
||||
void copy_file_to_file(std::ifstream & f_in, std::ofstream & f_out, const size_t in_offset, const size_t len) {
|
||||
// TODO: detect OS and use copy_file_range() here for better performance
|
||||
if (read_buf.size() < len) {
|
||||
read_buf.resize(len);
|
||||
}
|
||||
f_in.seekg(in_offset);
|
||||
f_in.read((char *)read_buf.data(), len);
|
||||
f_out.write((const char *)read_buf.data(), len);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -254,32 +353,22 @@ static void gguf_split(const split_params & split_params) {
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// prepare the strategy
|
||||
split_strategy strategy(split_params, f_input, ctx_gguf, ctx_meta);
|
||||
int n_split = strategy.ctx_outs.size();
|
||||
strategy.print_info();
|
||||
|
||||
char first_split_path[PATH_MAX] = {0};
|
||||
llama_split_path(first_split_path, sizeof(first_split_path),
|
||||
split_params.output.c_str(), strategy.i_split, strategy.n_split);
|
||||
fprintf(stderr, "%s: %s -> %s (%d tensors per file)\n",
|
||||
__func__, split_params.input.c_str(),
|
||||
first_split_path,
|
||||
split_params.n_split_tensors);
|
||||
|
||||
strategy.split_start();
|
||||
|
||||
while (strategy.i_tensor < strategy.n_tensors) {
|
||||
strategy.next_tensor();
|
||||
if (strategy.should_split()) {
|
||||
strategy.split_end();
|
||||
strategy.split_start();
|
||||
}
|
||||
if (!split_params.dry_run) {
|
||||
// write all output splits
|
||||
strategy.write();
|
||||
}
|
||||
strategy.split_end();
|
||||
|
||||
// done, clean up
|
||||
gguf_free(ctx_gguf);
|
||||
f_input.close();
|
||||
|
||||
fprintf(stderr, "%s: %d gguf split written with a total of %d tensors.\n",
|
||||
__func__, strategy.n_split, strategy.n_tensors);
|
||||
__func__, n_split, strategy.n_tensors);
|
||||
}
|
||||
|
||||
static void gguf_merge(const split_params & split_params) {
|
||||
@@ -448,10 +537,6 @@ static void gguf_merge(const split_params & split_params) {
|
||||
}
|
||||
|
||||
int main(int argc, const char ** argv) {
|
||||
if (argc < 3) {
|
||||
split_print_usage(argv[0]);
|
||||
}
|
||||
|
||||
split_params params;
|
||||
split_params_parse(argc, argv, params);
|
||||
|
||||
|
||||
@@ -705,8 +705,13 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
|
||||
struct ggml_tensor * leaf = graph->leafs[i];
|
||||
struct hash_node * hn = ggml_gallocr_hash_get(galloc, leaf);
|
||||
galloc->leaf_allocs[i].buffer_id = hn->buffer_id;
|
||||
galloc->leaf_allocs[i].leaf.offset = hn->offset;
|
||||
galloc->leaf_allocs[i].leaf.size_max = ggml_backend_buft_get_alloc_size(galloc->bufts[hn->buffer_id], leaf);
|
||||
if (leaf->view_src || leaf->data) {
|
||||
galloc->leaf_allocs[i].leaf.offset = SIZE_MAX;
|
||||
galloc->leaf_allocs[i].leaf.size_max = 0;
|
||||
} else {
|
||||
galloc->leaf_allocs[i].leaf.offset = hn->offset;
|
||||
galloc->leaf_allocs[i].leaf.size_max = ggml_backend_buft_get_alloc_size(galloc->bufts[hn->buffer_id], leaf);
|
||||
}
|
||||
}
|
||||
|
||||
// reallocate buffers if needed
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#include "../ggml.h"
|
||||
#include "../ggml-cuda.h"
|
||||
#include "ggml.h"
|
||||
#include "ggml-cuda.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
#if defined(GGML_USE_HIPBLAS)
|
||||
@@ -11,7 +12,7 @@
|
||||
#define GGML_COMMON_DECL_CUDA
|
||||
#define GGML_COMMON_IMPL_CUDA
|
||||
#endif
|
||||
#include "../ggml-common.h"
|
||||
#include "ggml-common.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <array>
|
||||
|
||||
@@ -2,14 +2,6 @@
|
||||
#include "dequantize.cuh"
|
||||
#include "convert.cuh"
|
||||
|
||||
// dmmv = dequantize_mul_mat_vec
|
||||
#ifndef GGML_CUDA_DMMV_X
|
||||
#define GGML_CUDA_DMMV_X 32
|
||||
#endif
|
||||
#ifndef GGML_CUDA_MMV_Y
|
||||
#define GGML_CUDA_MMV_Y 1
|
||||
#endif
|
||||
|
||||
#ifndef K_QUANTS_PER_ITERATION
|
||||
#define K_QUANTS_PER_ITERATION 2
|
||||
#else
|
||||
|
||||
@@ -1,5 +1,16 @@
|
||||
#include "common.cuh"
|
||||
|
||||
// dmmv = dequantize_mul_mat_vec
|
||||
|
||||
// TODO: remove this?
|
||||
#ifndef GGML_CUDA_DMMV_X
|
||||
#define GGML_CUDA_DMMV_X 32
|
||||
#endif
|
||||
|
||||
#ifndef GGML_CUDA_MMV_Y
|
||||
#define GGML_CUDA_MMV_Y 1
|
||||
#endif
|
||||
|
||||
void ggml_cuda_op_dequantize_mul_mat_vec(
|
||||
ggml_backend_cuda_context & ctx,
|
||||
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, const char * src0_dd_i, const float * src1_ddf_i,
|
||||
|
||||
27814
ggml-vulkan-shaders.hpp
27814
ggml-vulkan-shaders.hpp
File diff suppressed because it is too large
Load Diff
633
ggml-vulkan.cpp
633
ggml-vulkan.cpp
File diff suppressed because it is too large
Load Diff
@@ -11,17 +11,6 @@ extern "C" {
|
||||
#define GGML_VK_MAX_DEVICES 16
|
||||
|
||||
GGML_API void ggml_vk_instance_init(void);
|
||||
GGML_API void ggml_vk_init_cpu_assist(void);
|
||||
|
||||
GGML_API void ggml_vk_preallocate_buffers_graph_cpu_assist(struct ggml_tensor * node);
|
||||
GGML_API void ggml_vk_preallocate_buffers_cpu_assist(void);
|
||||
GGML_API void ggml_vk_build_graph_cpu_assist(struct ggml_tensor * node, bool last_node);
|
||||
GGML_API bool ggml_vk_compute_forward_cpu_assist(struct ggml_compute_params * params, struct ggml_tensor * tensor);
|
||||
#ifdef GGML_VULKAN_CHECK_RESULTS
|
||||
void ggml_vk_check_results_1_cpu_assist(struct ggml_compute_params * params, struct ggml_tensor * tensor);
|
||||
#endif
|
||||
GGML_API void ggml_vk_graph_cleanup_cpu_assist(void);
|
||||
GGML_API void ggml_vk_free_cpu_assist(void);
|
||||
|
||||
// backend API
|
||||
GGML_API GGML_CALL ggml_backend_t ggml_backend_vk_init(size_t dev_num);
|
||||
|
||||
35
ggml.c
35
ggml.c
@@ -278,8 +278,6 @@ inline static void * ggml_calloc(size_t num, size_t size) {
|
||||
#include <Accelerate/Accelerate.h>
|
||||
#if defined(GGML_USE_CLBLAST) // allow usage of CLBlast alongside Accelerate functions
|
||||
#include "ggml-opencl.h"
|
||||
#elif defined(GGML_USE_VULKAN)
|
||||
#include "ggml-vulkan.h"
|
||||
#endif
|
||||
#elif defined(GGML_USE_OPENBLAS)
|
||||
#if defined(GGML_BLAS_USE_MKL)
|
||||
@@ -289,8 +287,6 @@ inline static void * ggml_calloc(size_t num, size_t size) {
|
||||
#endif
|
||||
#elif defined(GGML_USE_CLBLAST)
|
||||
#include "ggml-opencl.h"
|
||||
#elif defined(GGML_USE_VULKAN)
|
||||
#include "ggml-vulkan.h"
|
||||
#endif
|
||||
|
||||
// floating point type used to accumulate sums
|
||||
@@ -2717,8 +2713,6 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
||||
|
||||
#if defined(GGML_USE_CLBLAST)
|
||||
ggml_cl_init();
|
||||
#elif defined(GGML_USE_VULKAN)
|
||||
ggml_vk_init_cpu_assist();
|
||||
#endif
|
||||
|
||||
ggml_setup_op_has_task_pass();
|
||||
@@ -16128,20 +16122,6 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
||||
return;
|
||||
}
|
||||
|
||||
#if defined(GGML_USE_VULKAN)
|
||||
const bool skip_cpu = ggml_vk_compute_forward_cpu_assist(params, tensor);
|
||||
#ifdef GGML_VULKAN_CHECK_RESULTS
|
||||
if (skip_cpu) {
|
||||
ggml_vk_check_results_1_cpu_assist(params, tensor);
|
||||
}
|
||||
#endif
|
||||
if (skip_cpu) {
|
||||
return;
|
||||
}
|
||||
GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend == GGML_BACKEND_TYPE_CPU);
|
||||
GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_TYPE_CPU);
|
||||
#endif // GGML_USE_VULKAN
|
||||
|
||||
switch (tensor->op) {
|
||||
case GGML_OP_DUP:
|
||||
{
|
||||
@@ -18617,17 +18597,6 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef GGML_USE_VULKAN
|
||||
for (int i = 0; i < cgraph->n_nodes; i++) {
|
||||
ggml_vk_preallocate_buffers_graph_cpu_assist(cgraph->nodes[i]);
|
||||
}
|
||||
ggml_vk_preallocate_buffers_cpu_assist();
|
||||
|
||||
for (int i = 0; i < cgraph->n_nodes; i++) {
|
||||
ggml_vk_build_graph_cpu_assist(cgraph->nodes[i], i == cgraph->n_nodes - 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
const int n_threads = cplan->n_threads;
|
||||
|
||||
struct ggml_compute_state_shared state_shared = {
|
||||
@@ -18684,10 +18653,6 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef GGML_USE_VULKAN
|
||||
ggml_vk_graph_cleanup_cpu_assist();
|
||||
#endif
|
||||
|
||||
// performance stats (graph)
|
||||
{
|
||||
int64_t perf_cycles_cur = ggml_perf_cycles() - perf_start_cycles;
|
||||
|
||||
@@ -18,6 +18,12 @@ shader_int8_ext = """
|
||||
"""
|
||||
|
||||
# Type-specific defines
|
||||
shader_f32_defines = """
|
||||
#define QUANT_K 1
|
||||
#define QUANT_R 1
|
||||
|
||||
#define A_TYPE float
|
||||
"""
|
||||
shader_f16_defines = """
|
||||
#define QUANT_K 1
|
||||
#define QUANT_R 1
|
||||
@@ -157,8 +163,8 @@ struct block_q6_K
|
||||
"""
|
||||
|
||||
# Dequant functions
|
||||
shader_f16_dequant_func = """
|
||||
#define DEQUANT_FUNC vec2 v = vec2(data_a[ib + 0], data_a[ib + 1]);
|
||||
shader_float_dequant_func = """
|
||||
#define DEQUANT_FUNC vec2 v = vec2(ib, ib); // data_a[ib], data_a[ib + 1]);
|
||||
"""
|
||||
|
||||
shader_q4_0_dequant_func = """
|
||||
@@ -410,6 +416,133 @@ mulmat_load_q8_0 = """
|
||||
buf_a[buf_idx ] = FLOAT_TYPE(v.x);
|
||||
buf_a[buf_idx + 1] = FLOAT_TYPE(v.y);"""
|
||||
|
||||
|
||||
mulmat_load_q2_K = """
|
||||
const uint idx = pos_a + (loadc_a + l) * p.stride_a / LOAD_VEC_A + loadr_a;
|
||||
const uint buf_idx = (loadc_a + l) * (BK+1) + loadr_a * LOAD_VEC_A;
|
||||
|
||||
const uint ib = idx / 128; // 2 values per idx
|
||||
const uint iqs = idx % 128; // 0..127
|
||||
|
||||
const uint qsi = (iqs / 64) * 32 + (iqs % 16) * 2; // 0,2,4..30
|
||||
const uint scalesi = iqs / 8; // 0..15
|
||||
const uint qsshift = ((iqs % 64) / 16) * 2; // 0,2,4,6
|
||||
|
||||
const uvec2 qs = uvec2(data_a[ib].qs[qsi], data_a[ib].qs[qsi + 1]);
|
||||
const uint scales = data_a[ib].scales[scalesi];
|
||||
const vec2 d = vec2(data_a[ib].d);
|
||||
|
||||
const vec2 v = d.x * float(scales & 0xF) * vec2((qs >> qsshift) & 3) - d.y * float(scales >> 4);
|
||||
|
||||
buf_a[buf_idx ] = FLOAT_TYPE(v.x);
|
||||
buf_a[buf_idx + 1] = FLOAT_TYPE(v.y);"""
|
||||
|
||||
mulmat_load_q3_K = """
|
||||
const uint idx = pos_a + (loadc_a + l) * p.stride_a / LOAD_VEC_A + loadr_a;
|
||||
const uint buf_idx = (loadc_a + l) * (BK+1) + loadr_a * LOAD_VEC_A;
|
||||
|
||||
const uint ib = idx / 128; // 2 values per idx
|
||||
const uint iqs = idx % 128; // 0..127
|
||||
|
||||
const uint n = iqs / 64; // 0,1
|
||||
const uint qsi = n * 32 + (iqs % 16) * 2; // 0,2,4..62
|
||||
const uint hmi = (iqs % 16) * 2; // 0,2,4..30
|
||||
const uint j = (iqs % 64) / 4; // 0..3
|
||||
const uint is = iqs / 8; // 0..15
|
||||
const uint halfsplit = ((iqs % 64) / 16); // 0,1,2,3
|
||||
const uint qsshift = halfsplit * 2; // 0,2,4,6
|
||||
const uint m = 1 << (4 * n + halfsplit); // 1,2,4,8,16,32,64,128
|
||||
|
||||
const int8_t us = int8_t(is < 4 ? (data_a[ib].scales[is-0] & 0xF) | (((data_a[ib].scales[is+8] >> 0) & 3) << 4) :
|
||||
is < 8 ? (data_a[ib].scales[is-0] & 0xF) | (((data_a[ib].scales[is+4] >> 2) & 3) << 4) :
|
||||
is < 12 ? (data_a[ib].scales[is-8] >> 4) | (((data_a[ib].scales[is+0] >> 4) & 3) << 4) :
|
||||
(data_a[ib].scales[is-8] >> 4) | (((data_a[ib].scales[is-4] >> 6) & 3) << 4));
|
||||
const float dl = float(data_a[ib].d) * float(us - 32);
|
||||
|
||||
buf_a[buf_idx ] = FLOAT_TYPE(dl * float(int8_t((data_a[ib].qs[qsi ] >> qsshift) & 3) - (((data_a[ib].hmask[hmi ] & m) != 0) ? 0 : 4)));
|
||||
buf_a[buf_idx + 1] = FLOAT_TYPE(dl * float(int8_t((data_a[ib].qs[qsi + 1] >> qsshift) & 3) - (((data_a[ib].hmask[hmi + 1] & m) != 0) ? 0 : 4)));"""
|
||||
|
||||
mulmat_load_q4_K = """
|
||||
const uint idx = pos_a + (loadc_a + l) * p.stride_a / LOAD_VEC_A + loadr_a;
|
||||
const uint buf_idx = (loadc_a + l) * (BK+1) + loadr_a * LOAD_VEC_A;
|
||||
|
||||
const uint ib = idx / 128; // 2 values per idx
|
||||
const uint iqs = idx % 128; // 0..127
|
||||
|
||||
const uint n = iqs / 32; // 0,1,2,3
|
||||
const uint b = (iqs % 32) / 16; // 0,1
|
||||
const uint is = 2 * n + b; // 0..7
|
||||
const uint qsi = n * 32 + (iqs % 16) * 2; // 0,2,4..126
|
||||
|
||||
const vec2 loadd = vec2(data_a[ib].d);
|
||||
|
||||
uint8_t sc;
|
||||
uint8_t mbyte;
|
||||
if (is < 4) {
|
||||
sc = uint8_t(data_a[ib].scales[is ] & 63);
|
||||
mbyte = uint8_t(data_a[ib].scales[is + 4] & 63);
|
||||
} else {
|
||||
sc = uint8_t((data_a[ib].scales[is + 4] & 0xF) | ((data_a[ib].scales[is - 4] >> 6) << 4));
|
||||
mbyte = uint8_t((data_a[ib].scales[is + 4] >> 4) | ((data_a[ib].scales[is ] >> 6) << 4));
|
||||
}
|
||||
const float d = loadd.x * sc;
|
||||
const float m = loadd.y * mbyte;
|
||||
|
||||
buf_a[buf_idx ] = FLOAT_TYPE(d * float((data_a[ib].qs[qsi ] >> (b * 4)) & 0xF) - m);
|
||||
buf_a[buf_idx + 1] = FLOAT_TYPE(d * float((data_a[ib].qs[qsi + 1] >> (b * 4)) & 0xF) - m);"""
|
||||
|
||||
mulmat_load_q5_K = """
|
||||
const uint idx = pos_a + (loadc_a + l) * p.stride_a / LOAD_VEC_A + loadr_a;
|
||||
const uint buf_idx = (loadc_a + l) * (BK+1) + loadr_a * LOAD_VEC_A;
|
||||
|
||||
const uint ib = idx / 128; // 2 values per idx
|
||||
const uint iqs = idx % 128; // 0..127
|
||||
|
||||
const uint n = iqs / 32; // 0,1,2,3
|
||||
const uint b = (iqs % 32) / 16; // 0,1
|
||||
const uint is = 2 * n + b; // 0..7
|
||||
const uint qsi = n * 32 + (iqs % 16) * 2; // 0,2,4..126
|
||||
const uint qhi = (iqs % 16) * 2; // 0,2,4..30
|
||||
|
||||
const uint8_t hm = uint8_t(1 << (iqs / 16));
|
||||
|
||||
const vec2 loadd = vec2(data_a[ib].d);
|
||||
|
||||
uint8_t sc;
|
||||
uint8_t mbyte;
|
||||
if (is < 4) {
|
||||
sc = uint8_t(data_a[ib].scales[is ] & 63);
|
||||
mbyte = uint8_t(data_a[ib].scales[is + 4] & 63);
|
||||
} else {
|
||||
sc = uint8_t((data_a[ib].scales[is + 4] & 0xF) | ((data_a[ib].scales[is - 4] >> 6) << 4));
|
||||
mbyte = uint8_t((data_a[ib].scales[is + 4] >> 4) | ((data_a[ib].scales[is ] >> 6) << 4));
|
||||
}
|
||||
const float d = loadd.x * sc;
|
||||
const float m = loadd.y * mbyte;
|
||||
|
||||
buf_a[buf_idx ] = FLOAT_TYPE(d * (float((data_a[ib].qs[qsi ] >> (b * 4)) & 0xF) + float((data_a[ib].qh[qhi ] & hm) != 0 ? 16 : 0)) - m);
|
||||
buf_a[buf_idx + 1] = FLOAT_TYPE(d * (float((data_a[ib].qs[qsi + 1] >> (b * 4)) & 0xF) + float((data_a[ib].qh[qhi + 1] & hm) != 0 ? 16 : 0)) - m);"""
|
||||
|
||||
mulmat_load_q6_K = """
|
||||
const uint idx = pos_a + (loadc_a + l) * p.stride_a / LOAD_VEC_A + loadr_a;
|
||||
const uint buf_idx = (loadc_a + l) * (BK+1) + loadr_a * LOAD_VEC_A;
|
||||
|
||||
const uint ib = idx / 128; // 2 values per idx
|
||||
const uint iqs = idx % 128; // 0..127
|
||||
|
||||
const uint n = iqs / 64; // 0,1
|
||||
const uint b = (iqs % 64) / 32; // 0,1
|
||||
const uint is_b = (iqs % 16) / 8; // 0,1
|
||||
const uint qhshift = ((iqs % 64) / 16) * 2; // 0,2,4,6
|
||||
const uint is = 8 * n + qhshift + is_b; // 0..15
|
||||
const uint qsi = n * 64 + (iqs % 32) * 2; // 0,2,4..126
|
||||
const uint qhi = n * 32 + (iqs % 16) * 2; // 0,2,4..62
|
||||
|
||||
const float dscale = float(data_a[ib].d) * float(data_a[ib].scales[is]);
|
||||
|
||||
buf_a[buf_idx ] = FLOAT_TYPE(dscale * float(int8_t(((data_a[ib].ql[qsi ] >> (b * 4)) & 0xF) | (((data_a[ib].qh[qhi ] >> qhshift) & 3) << 4)) - 32));
|
||||
buf_a[buf_idx + 1] = FLOAT_TYPE(dscale * float(int8_t(((data_a[ib].ql[qsi + 1] >> (b * 4)) & 0xF) | (((data_a[ib].qh[qhi + 1] >> qhshift) & 3) << 4)) - 32));"""
|
||||
|
||||
mulmat_body2 = """
|
||||
}
|
||||
[[unroll]] for (uint l = 0; l < BN; l += loadstride_b) {
|
||||
@@ -1611,8 +1744,9 @@ layout (push_constant) uniform parameter
|
||||
uint ne10; uint ne11; uint ne12; uint ne13; uint nb10; uint nb11; uint nb12; uint nb13;
|
||||
uint d_offset;
|
||||
float param1; float param2;
|
||||
} p;
|
||||
} p;"""
|
||||
|
||||
generic_unary_op_funcs = """
|
||||
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
layout (binding = 0) readonly buffer A {A_TYPE data_a[];};
|
||||
@@ -1636,14 +1770,17 @@ uint dst_idx(uint idx) {
|
||||
const uint i11 = (idx - i13_offset - i12_offset) / p.ne10;
|
||||
const uint i10 = idx - i13_offset - i12_offset - i11*p.ne10;
|
||||
return i13*p.nb13 + i12*p.nb12 + i11*p.nb11 + i10*p.nb10;
|
||||
}
|
||||
}"""
|
||||
|
||||
generic_unary_op_main = """
|
||||
void main() {
|
||||
if (gl_GlobalInvocationID.x >= p.ne) {
|
||||
return;
|
||||
}
|
||||
"""
|
||||
|
||||
generic_unary_op_combined = f"{generic_unary_op_head}\n{generic_unary_op_funcs}\n{generic_unary_op_main}"
|
||||
|
||||
generic_binary_op_head = """#version 450
|
||||
|
||||
#extension GL_EXT_shader_16bit_storage : require
|
||||
@@ -1655,13 +1792,14 @@ layout (push_constant) uniform parameter
|
||||
uint ne10; uint ne11; uint ne12; uint ne13; uint nb10; uint nb11; uint nb12; uint nb13;
|
||||
uint ne20; uint ne21; uint ne22; uint ne23; uint nb20; uint nb21; uint nb22; uint nb23;
|
||||
uint d_offset;
|
||||
uint param1; uint param2;
|
||||
} p;
|
||||
float param1; float param2;
|
||||
} p;"""
|
||||
|
||||
generic_binary_op_funcs = """
|
||||
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
layout (binding = 0) readonly buffer A {A_TYPE data_a[];};
|
||||
layout (binding = 1) readonly buffer B {A_TYPE data_b[];};
|
||||
layout (binding = 1) readonly buffer B {B_TYPE data_b[];};
|
||||
layout (binding = 2) writeonly buffer D {D_TYPE data_d[];};
|
||||
|
||||
uint src0_idx(uint idx) {
|
||||
@@ -1693,14 +1831,17 @@ uint dst_idx(uint idx) {
|
||||
const uint i21 = (idx - i23_offset - i22_offset) / p.ne20;
|
||||
const uint i20 = idx - i23_offset - i22_offset - i21*p.ne20;
|
||||
return i23*p.nb23 + i22*p.nb22 + i21*p.nb21 + i20*p.nb20;
|
||||
}
|
||||
}"""
|
||||
|
||||
generic_binary_op_main = """
|
||||
void main() {
|
||||
if (gl_GlobalInvocationID.x >= p.ne) {
|
||||
return;
|
||||
}
|
||||
"""
|
||||
|
||||
generic_binary_op_combined = f"{generic_binary_op_head}\n{generic_binary_op_funcs}\n{generic_binary_op_main}"
|
||||
|
||||
# MUL F32
|
||||
mul_body = """
|
||||
data_d[p.d_offset + dst_idx(gl_GlobalInvocationID.x)] = D_TYPE(FLOAT_TYPE(data_a[src0_idx(gl_GlobalInvocationID.x)]) * FLOAT_TYPE(data_b[src1_idx(gl_GlobalInvocationID.x)]));
|
||||
@@ -1745,39 +1886,55 @@ cpy_f16_f16_end = """
|
||||
"""
|
||||
|
||||
# GET_ROWS
|
||||
get_rows_body = """
|
||||
#extension GL_EXT_control_flow_attributes : enable
|
||||
#extension GL_EXT_shader_8bit_storage : require
|
||||
|
||||
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
layout (binding = 0) readonly buffer X {A_TYPE data_a[];};
|
||||
layout (binding = 1) readonly buffer Y {int data_b[];};
|
||||
layout (binding = 2) writeonly buffer D {D_TYPE dst[];};
|
||||
|
||||
get_rows_float_body = """
|
||||
void main() {
|
||||
const uint col = int(gl_GlobalInvocationID.x) * 2;
|
||||
const uint row = int(gl_GlobalInvocationID.y);
|
||||
const uint i00 = gl_GlobalInvocationID.x;
|
||||
const uint i10 = gl_GlobalInvocationID.y;
|
||||
const uint i11 = (gl_GlobalInvocationID.z)/p.ne12;
|
||||
const uint i12 = (gl_GlobalInvocationID.z)%p.ne12;
|
||||
|
||||
if (col >= p.KY) {
|
||||
if (i00 >= p.ne00) {
|
||||
return;
|
||||
}
|
||||
|
||||
const uint r = uint(data_b[row]);
|
||||
const uint i01 = data_b[i10*p.nb10 + i11*p.nb11 + i12*p.nb12];
|
||||
|
||||
// copy data_a[r*p.KY + col] to dst[row*p.KX + col]
|
||||
const uint xi = r*p.KY + col;
|
||||
const uint di = row*p.KY + col;
|
||||
const uint a_offset = i01*p.nb01 + i11*p.nb02 + i12*p.nb03;
|
||||
const uint d_offset = i10*p.nb21 + i11*p.nb22 + i12*p.nb23;
|
||||
|
||||
const uint ib = xi/QUANT_K; // block index
|
||||
const uint iqs = (xi%QUANT_K)/QUANT_R; // quant index
|
||||
const uint iybs = di - di%QUANT_K; // y block start index
|
||||
#ifndef OPTIMIZATION_ERROR_WORKAROUND
|
||||
data_d[d_offset + i00] = D_TYPE(data_a[a_offset + i00]);
|
||||
#else
|
||||
data_d[d_offset + i00] = data_a[a_offset + i00];
|
||||
#endif
|
||||
}
|
||||
"""
|
||||
|
||||
get_rows_body = """
|
||||
void main() {
|
||||
const uint i00 = (gl_GlobalInvocationID.x)*2;
|
||||
const uint i10 = gl_GlobalInvocationID.y;
|
||||
const uint i11 = (gl_GlobalInvocationID.z)/p.ne12;
|
||||
const uint i12 = (gl_GlobalInvocationID.z)%p.ne12;
|
||||
|
||||
if (i00 >= p.ne00) {
|
||||
return;
|
||||
}
|
||||
|
||||
const uint i01 = data_b[i10*p.nb10 + i11*p.nb11 + i12*p.nb12];
|
||||
|
||||
const uint a_offset = i01*p.nb01 + i11*p.nb02 + i12*p.nb03;
|
||||
const uint d_offset = i10*p.nb21 + i11*p.nb22 + i12*p.nb23;
|
||||
|
||||
const uint ib = a_offset + i00/QUANT_K; // block index
|
||||
const uint iqs = (i00%QUANT_K)/QUANT_R; // quant index
|
||||
const uint iybs = i00 - i00%QUANT_K; // dst block start index
|
||||
const uint y_offset = QUANT_R == 1 ? 1 : QUANT_K/2;
|
||||
|
||||
DEQUANT_FUNC
|
||||
|
||||
dst[iybs + iqs + 0] = D_TYPE(v.x);
|
||||
dst[iybs + iqs + y_offset] = D_TYPE(v.y);
|
||||
data_d[d_offset + iybs + iqs ] = D_TYPE(v.x);
|
||||
data_d[d_offset + iybs + iqs + y_offset] = D_TYPE(v.y);
|
||||
}
|
||||
"""
|
||||
|
||||
@@ -2418,6 +2575,31 @@ async def main():
|
||||
tasks.append(string_to_spv("matmul_q8_0_f32", "".join(stream), {"LOAD_VEC_A": 2, "A_TYPE": "block_q8_0", "B_TYPE": "float", "D_TYPE": "float"}, fp16))
|
||||
tasks.append(string_to_spv("matmul_q8_0_f32_aligned", "".join(stream), {"LOAD_VEC_A": 2, "LOAD_VEC_B": load_vec, "A_TYPE": "block_q8_0", "B_TYPE": vec_type, "D_TYPE": "float"}, fp16))
|
||||
|
||||
stream.clear()
|
||||
stream.extend((mulmat_head, shader_int8_ext, shader_float_type, shader_q2_K_defines, mulmat_body1, mulmat_load_q2_K, mulmat_body2))
|
||||
tasks.append(string_to_spv("matmul_q2_k_f32", "".join(stream), {"LOAD_VEC_A": 2, "A_TYPE": "block_q2_K", "B_TYPE": "float", "D_TYPE": "float"}, fp16))
|
||||
tasks.append(string_to_spv("matmul_q2_k_f32_aligned", "".join(stream), {"LOAD_VEC_A": 2, "LOAD_VEC_B": load_vec, "A_TYPE": "block_q2_K", "B_TYPE": vec_type, "D_TYPE": "float"}, fp16))
|
||||
|
||||
stream.clear()
|
||||
stream.extend((mulmat_head, shader_int8_ext, shader_float_type, shader_q3_K_defines, mulmat_body1, mulmat_load_q3_K, mulmat_body2))
|
||||
tasks.append(string_to_spv("matmul_q3_k_f32", "".join(stream), {"LOAD_VEC_A": 2, "A_TYPE": "block_q3_K", "B_TYPE": "float", "D_TYPE": "float"}, fp16))
|
||||
tasks.append(string_to_spv("matmul_q3_k_f32_aligned", "".join(stream), {"LOAD_VEC_A": 2, "LOAD_VEC_B": load_vec, "A_TYPE": "block_q3_K", "B_TYPE": vec_type, "D_TYPE": "float"}, fp16))
|
||||
|
||||
stream.clear()
|
||||
stream.extend((mulmat_head, shader_int8_ext, shader_float_type, shader_q4_K_defines, mulmat_body1, mulmat_load_q4_K, mulmat_body2))
|
||||
tasks.append(string_to_spv("matmul_q4_k_f32", "".join(stream), {"LOAD_VEC_A": 2, "A_TYPE": "block_q4_K", "B_TYPE": "float", "D_TYPE": "float"}, fp16))
|
||||
tasks.append(string_to_spv("matmul_q4_k_f32_aligned", "".join(stream), {"LOAD_VEC_A": 2, "LOAD_VEC_B": load_vec, "A_TYPE": "block_q4_K", "B_TYPE": vec_type, "D_TYPE": "float"}, fp16))
|
||||
|
||||
stream.clear()
|
||||
stream.extend((mulmat_head, shader_int8_ext, shader_float_type, shader_q5_K_defines, mulmat_body1, mulmat_load_q5_K, mulmat_body2))
|
||||
tasks.append(string_to_spv("matmul_q5_k_f32", "".join(stream), {"LOAD_VEC_A": 2, "A_TYPE": "block_q5_K", "B_TYPE": "float", "D_TYPE": "float"}, fp16))
|
||||
tasks.append(string_to_spv("matmul_q5_k_f32_aligned", "".join(stream), {"LOAD_VEC_A": 2, "LOAD_VEC_B": load_vec, "A_TYPE": "block_q5_K", "B_TYPE": vec_type, "D_TYPE": "float"}, fp16))
|
||||
|
||||
stream.clear()
|
||||
stream.extend((mulmat_head, shader_int8_ext, shader_float_type, shader_q6_K_defines, mulmat_body1, mulmat_load_q6_K, mulmat_body2))
|
||||
tasks.append(string_to_spv("matmul_q6_k_f32", "".join(stream), {"LOAD_VEC_A": 2, "A_TYPE": "block_q6_K", "B_TYPE": "float", "D_TYPE": "float"}, fp16))
|
||||
tasks.append(string_to_spv("matmul_q6_k_f32_aligned", "".join(stream), {"LOAD_VEC_A": 2, "LOAD_VEC_B": load_vec, "A_TYPE": "block_q6_K", "B_TYPE": vec_type, "D_TYPE": "float"}, fp16))
|
||||
|
||||
# Shaders where precision is needed, so no fp16 version
|
||||
|
||||
# mul mat vec
|
||||
@@ -2426,7 +2608,7 @@ async def main():
|
||||
stream.extend((mul_mat_vec_head, shader_int8_ext, shader_f32))
|
||||
|
||||
if i == GGML_TYPE_F16:
|
||||
stream.extend((shader_f16_defines, shader_f16_dequant_func, mul_mat_vec_body))
|
||||
stream.extend((shader_f16_defines, shader_float_dequant_func, mul_mat_vec_body))
|
||||
elif i == GGML_TYPE_Q4_0:
|
||||
stream.extend((shader_q4_0_defines, shader_q4_0_dequant_func, mul_mat_vec_body))
|
||||
elif i == GGML_TYPE_Q4_1:
|
||||
@@ -2488,25 +2670,32 @@ async def main():
|
||||
# get_rows
|
||||
for i in range(0, VK_NUM_TYPES):
|
||||
stream.clear()
|
||||
stream.extend((generic_head, shader_int8_ext, shader_f32))
|
||||
stream.extend((generic_binary_op_head, shader_int8_ext, shader_f32))
|
||||
optimization_workaround = False
|
||||
|
||||
if i == GGML_TYPE_F16:
|
||||
stream.extend((shader_f16_defines, shader_f16_dequant_func, get_rows_body))
|
||||
if i == GGML_TYPE_F32:
|
||||
stream.extend((shader_f32_defines, generic_binary_op_funcs, get_rows_float_body))
|
||||
elif i == GGML_TYPE_F16:
|
||||
stream.extend((shader_f16_defines, generic_binary_op_funcs, get_rows_float_body))
|
||||
optimization_workaround = True
|
||||
elif i == GGML_TYPE_Q4_0:
|
||||
stream.extend((shader_q4_0_defines, shader_q4_0_dequant_func, get_rows_body))
|
||||
stream.extend((shader_q4_0_defines, shader_q4_0_dequant_func, generic_binary_op_funcs, get_rows_body))
|
||||
elif i == GGML_TYPE_Q4_1:
|
||||
stream.extend((shader_q4_1_defines, shader_q4_1_dequant_func, get_rows_body))
|
||||
stream.extend((shader_q4_1_defines, shader_q4_1_dequant_func, generic_binary_op_funcs, get_rows_body))
|
||||
elif i == GGML_TYPE_Q5_0:
|
||||
stream.extend((shader_q5_0_defines, shader_q5_0_dequant_func, get_rows_body))
|
||||
stream.extend((shader_q5_0_defines, shader_q5_0_dequant_func, generic_binary_op_funcs, get_rows_body))
|
||||
elif i == GGML_TYPE_Q5_1:
|
||||
stream.extend((shader_q5_1_defines, shader_q5_1_dequant_func, get_rows_body))
|
||||
stream.extend((shader_q5_1_defines, shader_q5_1_dequant_func, generic_binary_op_funcs, get_rows_body))
|
||||
elif i == GGML_TYPE_Q8_0:
|
||||
stream.extend((shader_q8_0_defines, shader_q8_0_dequant_func, get_rows_body))
|
||||
stream.extend((shader_q8_0_defines, shader_q8_0_dequant_func, generic_binary_op_funcs, get_rows_body))
|
||||
else:
|
||||
continue
|
||||
|
||||
tasks.append(string_to_spv(f"get_rows_{type_names[i]}", "".join(stream), {"B_TYPE": "float", "D_TYPE": "float16_t"}))
|
||||
tasks.append(string_to_spv(f"get_rows_{type_names[i]}_f32", "".join(stream), {"B_TYPE": "float", "D_TYPE": "float"}))
|
||||
if optimization_workaround:
|
||||
tasks.append(string_to_spv(f"get_rows_{type_names[i]}", "".join(stream), {"B_TYPE": "int", "D_TYPE": "float16_t", "OPTIMIZATION_ERROR_WORKAROUND": "1"}))
|
||||
else:
|
||||
tasks.append(string_to_spv(f"get_rows_{type_names[i]}", "".join(stream), {"B_TYPE": "int", "D_TYPE": "float16_t"}))
|
||||
tasks.append(string_to_spv(f"get_rows_{type_names[i]}_f32", "".join(stream), {"B_TYPE": "int", "D_TYPE": "float"}))
|
||||
|
||||
tasks.append(string_to_spv("mul_mat_vec_p021_f16_f32", mul_mat_p021_src, {"A_TYPE": "float16_t", "B_TYPE": "float", "D_TYPE": "float"}))
|
||||
tasks.append(string_to_spv("mul_mat_vec_nc_f16_f32", mul_mat_nc_src, {"A_TYPE": "float16_t", "B_TYPE": "float", "D_TYPE": "float"}))
|
||||
@@ -2515,20 +2704,20 @@ async def main():
|
||||
tasks.append(string_to_spv("norm_f32", f"{generic_head}\n{shader_f32}\n{norm_body}", {"A_TYPE": "float", "D_TYPE": "float"}))
|
||||
tasks.append(string_to_spv("rms_norm_f32", f"{generic_head}\n{shader_f32}\n{rms_norm_body}", {"A_TYPE": "float", "D_TYPE": "float"}))
|
||||
|
||||
tasks.append(string_to_spv("cpy_f32_f32", f"{generic_unary_op_head}\n{cpy_end}", {"A_TYPE": "float", "D_TYPE": "float"}))
|
||||
tasks.append(string_to_spv("cpy_f32_f16", f"{generic_unary_op_head}\n{cpy_end}", {"A_TYPE": "float", "D_TYPE": "float16_t"}))
|
||||
tasks.append(string_to_spv("cpy_f16_f16", f"{generic_unary_op_head}\n{cpy_f16_f16_end}", {"A_TYPE": "float16_t", "D_TYPE": "float16_t"}))
|
||||
tasks.append(string_to_spv("cpy_f32_f32", f"{generic_unary_op_combined}\n{cpy_end}", {"A_TYPE": "float", "D_TYPE": "float"}))
|
||||
tasks.append(string_to_spv("cpy_f32_f16", f"{generic_unary_op_combined}\n{cpy_end}", {"A_TYPE": "float", "D_TYPE": "float16_t"}))
|
||||
tasks.append(string_to_spv("cpy_f16_f16", f"{generic_unary_op_combined}\n{cpy_f16_f16_end}", {"A_TYPE": "float16_t", "D_TYPE": "float16_t"}))
|
||||
|
||||
tasks.append(string_to_spv("add_f32", f"{generic_binary_op_head}\n{add_body}", {"A_TYPE": "float", "B_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
||||
tasks.append(string_to_spv("add_f32", f"{generic_binary_op_combined}\n{add_body}", {"A_TYPE": "float", "B_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
||||
|
||||
tasks.append(string_to_spv("split_k_reduce", mulmat_split_k_reduce_src, {}))
|
||||
tasks.append(string_to_spv("mul_f32", f"{generic_binary_op_head}\n{mul_body}", {"A_TYPE": "float", "B_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
||||
tasks.append(string_to_spv("mul_f32", f"{generic_binary_op_combined}\n{mul_body}", {"A_TYPE": "float", "B_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
||||
|
||||
tasks.append(string_to_spv("scale_f32", f"{generic_unary_op_head}\n{scale_body}", {"A_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
||||
tasks.append(string_to_spv("scale_f32", f"{generic_unary_op_combined}\n{scale_body}", {"A_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
||||
|
||||
tasks.append(string_to_spv("sqr_f32", f"{generic_unary_op_head}\n{sqr_body}", {"A_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
||||
tasks.append(string_to_spv("sqr_f32", f"{generic_unary_op_combined}\n{sqr_body}", {"A_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
||||
|
||||
tasks.append(string_to_spv("clamp_f32", f"{generic_unary_op_head}\n{clamp_body}", {"A_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
||||
tasks.append(string_to_spv("clamp_f32", f"{generic_unary_op_combined}\n{clamp_body}", {"A_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
||||
|
||||
tasks.append(string_to_spv("gelu_f32", f"{generic_head}\n{shader_f32}\n{gelu_body}", {"A_TYPE": "float", "D_TYPE": "float"}))
|
||||
tasks.append(string_to_spv("silu_f32", f"{generic_head}\n{shader_f32}\n{silu_body}", {"A_TYPE": "float", "D_TYPE": "float"}))
|
||||
|
||||
@@ -123,6 +123,7 @@ class MODEL_ARCH(IntEnum):
|
||||
GEMMA = auto()
|
||||
STARCODER2 = auto()
|
||||
MAMBA = auto()
|
||||
XVERSE = auto()
|
||||
COMMAND_R = auto()
|
||||
|
||||
|
||||
@@ -191,6 +192,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
||||
MODEL_ARCH.GEMMA: "gemma",
|
||||
MODEL_ARCH.STARCODER2: "starcoder2",
|
||||
MODEL_ARCH.MAMBA: "mamba",
|
||||
MODEL_ARCH.XVERSE: "xverse",
|
||||
MODEL_ARCH.COMMAND_R: "command-r",
|
||||
}
|
||||
|
||||
@@ -606,6 +608,22 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||
MODEL_TENSOR.SSM_D,
|
||||
MODEL_TENSOR.SSM_OUT,
|
||||
],
|
||||
MODEL_ARCH.XVERSE: [
|
||||
MODEL_TENSOR.TOKEN_EMBD,
|
||||
MODEL_TENSOR.OUTPUT_NORM,
|
||||
MODEL_TENSOR.OUTPUT,
|
||||
MODEL_TENSOR.ROPE_FREQS,
|
||||
MODEL_TENSOR.ATTN_NORM,
|
||||
MODEL_TENSOR.ATTN_Q,
|
||||
MODEL_TENSOR.ATTN_K,
|
||||
MODEL_TENSOR.ATTN_V,
|
||||
MODEL_TENSOR.ATTN_OUT,
|
||||
MODEL_TENSOR.ATTN_ROT_EMBD,
|
||||
MODEL_TENSOR.FFN_NORM,
|
||||
MODEL_TENSOR.FFN_GATE,
|
||||
MODEL_TENSOR.FFN_DOWN,
|
||||
MODEL_TENSOR.FFN_UP,
|
||||
],
|
||||
MODEL_ARCH.COMMAND_R: [
|
||||
MODEL_TENSOR.TOKEN_EMBD,
|
||||
MODEL_TENSOR.OUTPUT_NORM,
|
||||
@@ -650,6 +668,10 @@ MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||
MODEL_TENSOR.ROPE_FREQS,
|
||||
MODEL_TENSOR.ATTN_ROT_EMBD,
|
||||
],
|
||||
MODEL_ARCH.XVERSE: [
|
||||
MODEL_TENSOR.ROPE_FREQS,
|
||||
MODEL_TENSOR.ATTN_ROT_EMBD,
|
||||
],
|
||||
}
|
||||
|
||||
#
|
||||
|
||||
184
llama.cpp
184
llama.cpp
@@ -218,6 +218,7 @@ enum llm_arch {
|
||||
LLM_ARCH_GEMMA,
|
||||
LLM_ARCH_STARCODER2,
|
||||
LLM_ARCH_MAMBA,
|
||||
LLM_ARCH_XVERSE,
|
||||
LLM_ARCH_COMMAND_R,
|
||||
LLM_ARCH_UNKNOWN,
|
||||
};
|
||||
@@ -249,6 +250,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
||||
{ LLM_ARCH_GEMMA, "gemma" },
|
||||
{ LLM_ARCH_STARCODER2, "starcoder2" },
|
||||
{ LLM_ARCH_MAMBA, "mamba" },
|
||||
{ LLM_ARCH_XVERSE, "xverse" },
|
||||
{ LLM_ARCH_COMMAND_R, "command-r" },
|
||||
{ LLM_ARCH_UNKNOWN, "(unknown)" },
|
||||
};
|
||||
@@ -878,6 +880,25 @@ static const std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NA
|
||||
{ LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
|
||||
},
|
||||
},
|
||||
{
|
||||
LLM_ARCH_XVERSE,
|
||||
{
|
||||
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
||||
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
||||
{ LLM_TENSOR_OUTPUT, "output" },
|
||||
{ LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
|
||||
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
||||
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
||||
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
||||
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
||||
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
||||
{ LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
|
||||
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
||||
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
||||
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
||||
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
||||
},
|
||||
},
|
||||
{
|
||||
LLM_ARCH_COMMAND_R,
|
||||
{
|
||||
@@ -2100,10 +2121,6 @@ struct llama_context {
|
||||
ggml_backend_free(backend);
|
||||
}
|
||||
|
||||
#ifdef GGML_USE_VULKAN
|
||||
ggml_vk_free_cpu_assist();
|
||||
#endif
|
||||
|
||||
ggml_backend_buffer_free(buf_output);
|
||||
}
|
||||
|
||||
@@ -3847,6 +3864,16 @@ static void llm_load_hparams(
|
||||
default: model.type = e_model::MODEL_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_XVERSE:
|
||||
{
|
||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||
switch (hparams.n_layer) {
|
||||
case 32: model.type = e_model::MODEL_7B; break;
|
||||
case 40: model.type = e_model::MODEL_13B; break;
|
||||
case 80: model.type = e_model::MODEL_65B; break;
|
||||
default: model.type = e_model::MODEL_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_COMMAND_R:
|
||||
{
|
||||
ml.get_key(LLM_KV_LOGIT_SCALE, hparams.f_logit_scale);
|
||||
@@ -5200,6 +5227,28 @@ static bool llm_load_tensors(
|
||||
layer.ssm_out = ml.create_tensor(ctx_split, tn(LLM_TENSOR_SSM_OUT, "weight", i), {d_inner, n_embd});
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_XVERSE:
|
||||
{
|
||||
model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab});
|
||||
{
|
||||
model.output_norm = ml.create_tensor(ctx_output, tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd});
|
||||
model.output = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab});
|
||||
}
|
||||
for (int i = 0; i < n_layer; ++i) {
|
||||
ggml_context * ctx_layer = ctx_for_layer(i);
|
||||
ggml_context * ctx_split = ctx_for_layer_split(i);
|
||||
auto & layer = model.layers[i];
|
||||
layer.attn_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd});
|
||||
layer.wq = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_Q, "weight", i), {n_embd, n_embd});
|
||||
layer.wk = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_K, "weight", i), {n_embd, n_embd_gqa});
|
||||
layer.wv = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_V, "weight", i), {n_embd, n_embd_gqa});
|
||||
layer.wo = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd});
|
||||
layer.ffn_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd});
|
||||
layer.ffn_gate = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff});
|
||||
layer.ffn_down = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd});
|
||||
layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff});
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_COMMAND_R:
|
||||
{
|
||||
model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab});
|
||||
@@ -5238,7 +5287,7 @@ static bool llm_load_tensors(
|
||||
|
||||
ml.done_getting_tensors();
|
||||
|
||||
ml.init_mappings(true, &model.mlock_mmaps);
|
||||
ml.init_mappings(true, use_mlock ? &model.mlock_mmaps : nullptr);
|
||||
model.mappings.reserve(ml.mappings.size());
|
||||
|
||||
// create the backend buffers
|
||||
@@ -6411,6 +6460,111 @@ struct llm_build_context {
|
||||
return gf;
|
||||
}
|
||||
|
||||
struct ggml_cgraph * build_xverse() {
|
||||
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false);
|
||||
|
||||
const int64_t n_embd_head = hparams.n_embd_head_v;
|
||||
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
|
||||
GGML_ASSERT(n_embd_head == hparams.n_rot);
|
||||
|
||||
struct ggml_tensor * cur;
|
||||
struct ggml_tensor * inpL;
|
||||
|
||||
inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb);
|
||||
|
||||
// inp_pos - contains the positions
|
||||
struct ggml_tensor * inp_pos = build_inp_pos();
|
||||
|
||||
// KQ_mask (mask for 1 head, it will be broadcasted to all heads)
|
||||
struct ggml_tensor * KQ_mask = build_inp_KQ_mask();
|
||||
|
||||
// positions of the tokens in the KV cache
|
||||
struct ggml_tensor * KQ_pos = build_inp_KQ_pos();
|
||||
|
||||
for (int il = 0; il < n_layer; ++il) {
|
||||
struct ggml_tensor * inpSA = inpL;
|
||||
|
||||
cur = llm_build_norm(ctx0, inpL, hparams,
|
||||
model.layers[il].attn_norm, NULL,
|
||||
LLM_NORM_RMS, cb, il);
|
||||
cb(cur, "attn_norm", il);
|
||||
|
||||
// self-attention
|
||||
{
|
||||
struct ggml_tensor * Qcur = ggml_mul_mat(ctx0, model.layers[il].wq, cur);
|
||||
cb(Qcur, "Qcur", il);
|
||||
|
||||
struct ggml_tensor * Kcur = ggml_mul_mat(ctx0, model.layers[il].wk, cur);
|
||||
cb(Kcur, "Kcur", il);
|
||||
|
||||
struct ggml_tensor * Vcur = ggml_mul_mat(ctx0, model.layers[il].wv, cur);
|
||||
cb(Vcur, "Vcur", il);
|
||||
|
||||
Qcur = ggml_rope_custom(
|
||||
ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos,
|
||||
n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
|
||||
ext_factor, attn_factor, beta_fast, beta_slow
|
||||
);
|
||||
cb(Qcur, "Qcur", il);
|
||||
|
||||
Kcur = ggml_rope_custom(
|
||||
ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos,
|
||||
n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
|
||||
ext_factor, attn_factor, beta_fast, beta_slow
|
||||
);
|
||||
cb(Kcur, "Kcur", il);
|
||||
cur = llm_build_kv(ctx0, model, hparams, kv_self, gf,
|
||||
model.layers[il].wo, NULL,
|
||||
Kcur, Vcur, Qcur, KQ_mask, KQ_pos, n_ctx, n_tokens, kv_head, n_kv, 1.0f/sqrtf(float(n_embd_head)), cb, il);
|
||||
}
|
||||
|
||||
if (il == n_layer - 1) {
|
||||
// skip computing output for unused tokens
|
||||
struct ggml_tensor * inp_out_ids = build_inp_out_ids();
|
||||
cur = ggml_get_rows(ctx0, cur, inp_out_ids);
|
||||
inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA);
|
||||
cb(ffn_inp, "ffn_inp", il);
|
||||
|
||||
// feed-forward network
|
||||
{
|
||||
cur = llm_build_norm(ctx0, ffn_inp, hparams,
|
||||
model.layers[il].ffn_norm, NULL,
|
||||
LLM_NORM_RMS, cb, il);
|
||||
cb(cur, "ffn_norm", il);
|
||||
|
||||
cur = llm_build_ffn(ctx0, cur,
|
||||
model.layers[il].ffn_up, NULL,
|
||||
model.layers[il].ffn_gate, NULL,
|
||||
model.layers[il].ffn_down, NULL,
|
||||
NULL,
|
||||
LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
|
||||
cb(cur, "ffn_out", il);
|
||||
}
|
||||
|
||||
cur = ggml_add(ctx0, cur, ffn_inp);
|
||||
cb(cur, "l_out", il);
|
||||
|
||||
// input for next layer
|
||||
inpL = cur;
|
||||
}
|
||||
|
||||
cur = inpL;
|
||||
|
||||
cur = llm_build_norm(ctx0, cur, hparams, model.output_norm, NULL, LLM_NORM_RMS, cb, -1);
|
||||
cb(cur, "result_norm", -1);
|
||||
|
||||
// lm_head
|
||||
cur = ggml_mul_mat(ctx0, model.output, cur);
|
||||
cb(cur, "result_output", -1);
|
||||
|
||||
ggml_build_forward_expand(gf, cur);
|
||||
|
||||
return gf;
|
||||
}
|
||||
|
||||
struct ggml_cgraph * build_falcon() {
|
||||
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false);
|
||||
|
||||
@@ -9389,6 +9543,10 @@ static struct ggml_cgraph * llama_build_graph(
|
||||
{
|
||||
result = llm.build_mamba();
|
||||
} break;
|
||||
case LLM_ARCH_XVERSE:
|
||||
{
|
||||
result = llm.build_xverse();
|
||||
} break;
|
||||
case LLM_ARCH_COMMAND_R:
|
||||
{
|
||||
result = llm.build_command_r();
|
||||
@@ -13969,7 +14127,20 @@ struct llama_context * llama_new_context_with_model(
|
||||
}
|
||||
}
|
||||
#elif defined(GGML_USE_VULKAN)
|
||||
if (model->n_gpu_layers > 0) {
|
||||
if (model->split_mode == LLAMA_SPLIT_MODE_ROW) {
|
||||
LLAMA_LOG_ERROR("%s: Row split not supported. Failed to initialize Vulkan backend\n", __func__);
|
||||
llama_free(ctx);
|
||||
return nullptr;
|
||||
}
|
||||
if (model->split_mode == LLAMA_SPLIT_MODE_NONE) {
|
||||
ggml_backend_t backend = ggml_backend_vk_init(0);
|
||||
if (backend == nullptr) {
|
||||
LLAMA_LOG_ERROR("%s: failed to initialize Vulkan backend\n", __func__);
|
||||
llama_free(ctx);
|
||||
return nullptr;
|
||||
}
|
||||
ctx->backends.push_back(backend);
|
||||
} else {
|
||||
for (int device = 0; device < ggml_backend_vk_get_device_count(); ++device) {
|
||||
ggml_backend_t backend = ggml_backend_vk_init(device);
|
||||
if (backend == nullptr) {
|
||||
@@ -14188,6 +14359,7 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
|
||||
case LLM_ARCH_ORION:
|
||||
case LLM_ARCH_INTERNLM2:
|
||||
case LLM_ARCH_MINICPM:
|
||||
case LLM_ARCH_XVERSE:
|
||||
case LLM_ARCH_COMMAND_R:
|
||||
return LLAMA_ROPE_TYPE_NORM;
|
||||
|
||||
|
||||
9
scripts/gen-authors.sh
Executable file
9
scripts/gen-authors.sh
Executable file
@@ -0,0 +1,9 @@
|
||||
#!/bin/bash
|
||||
|
||||
printf "# date: $(date)\n" > AUTHORS
|
||||
printf "# this file is auto-generated by scripts/gen-authors.sh\n\n" >> AUTHORS
|
||||
|
||||
git log --format='%an <%ae>' --reverse --date=short master | awk '!seen[$0]++' | sort >> AUTHORS
|
||||
|
||||
# if necessary, update your name here. for example: jdoe -> John Doe
|
||||
sed -i '' 's/^jdoe/John Doe/g' AUTHORS
|
||||
@@ -65,6 +65,8 @@ while read c; do
|
||||
tests/test-quantize-fns.cpp \
|
||||
tests/test-quantize-perf.cpp \
|
||||
tests/test-backend-ops.cpp \
|
||||
LICENSE \
|
||||
scripts/gen-authors.sh \
|
||||
>> $SRC_LLAMA/ggml-src.patch
|
||||
done < $SRC_LLAMA/ggml-commits
|
||||
|
||||
@@ -95,6 +97,7 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then
|
||||
# src/ggml-backend-impl.h -> ggml-backend-impl.h
|
||||
# src/ggml-backend.c -> ggml-backend.c
|
||||
# src/ggml-common.h -> ggml-common.h
|
||||
# src/ggml-cuda/* -> ggml-cuda/
|
||||
# src/ggml-cuda.cu -> ggml-cuda.cu
|
||||
# src/ggml-cuda.h -> ggml-cuda.h
|
||||
# src/ggml-impl.h -> ggml-impl.h
|
||||
@@ -121,6 +124,9 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then
|
||||
# tests/test-quantize-fns.cpp -> tests/test-quantize-fns.cpp
|
||||
# tests/test-quantize-perf.cpp -> tests/test-quantize-perf.cpp
|
||||
# tests/test-backend-ops.cpp -> tests/test-backend-ops.cpp
|
||||
#
|
||||
# LICENSE -> LICENSE
|
||||
# scripts/gen-authors.sh -> scripts/gen-authors.sh
|
||||
|
||||
cat ggml-src.patch | sed \
|
||||
-e 's/src\/ggml\.c/ggml.c/g' \
|
||||
@@ -128,6 +134,7 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then
|
||||
-e 's/src\/ggml-backend-impl\.h/ggml-backend-impl.h/g' \
|
||||
-e 's/src\/ggml-backend\.c/ggml-backend.c/g' \
|
||||
-e 's/src\/ggml-common\.h/ggml-common.h/g' \
|
||||
-e 's/src\/ggml-cuda\//ggml-cuda\//g' \
|
||||
-e 's/src\/ggml-cuda\.cu/ggml-cuda.cu/g' \
|
||||
-e 's/src\/ggml-cuda\.h/ggml-cuda.h/g' \
|
||||
-e 's/src\/ggml-impl\.h/ggml-impl.h/g' \
|
||||
@@ -153,6 +160,8 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then
|
||||
-e 's/tests\/test-quantize-fns\.cpp/tests\/test-quantize-fns.cpp/g' \
|
||||
-e 's/tests\/test-quantize-perf\.cpp/tests\/test-quantize-perf.cpp/g' \
|
||||
-e 's/tests\/test-backend-ops\.cpp/tests\/test-backend-ops.cpp/g' \
|
||||
-e 's/LICENSE/LICENSE/g' \
|
||||
-e 's/scripts\/gen-authors\.sh/scripts\/gen-authors.sh/g' \
|
||||
> ggml-src.patch.tmp
|
||||
mv ggml-src.patch.tmp ggml-src.patch
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ cp -rpv ../ggml/src/ggml-alloc.c ./ggml-alloc.c
|
||||
cp -rpv ../ggml/src/ggml-backend-impl.h ./ggml-backend-impl.h
|
||||
cp -rpv ../ggml/src/ggml-backend.c ./ggml-backend.c
|
||||
cp -rpv ../ggml/src/ggml-common.h ./ggml-common.h
|
||||
cp -rpv ../ggml/src/ggml-cuda/* ./ggml-cuda/
|
||||
cp -rpv ../ggml/src/ggml-cuda.cu ./ggml-cuda.cu
|
||||
cp -rpv ../ggml/src/ggml-cuda.h ./ggml-cuda.h
|
||||
cp -rpv ../ggml/src/ggml-impl.h ./ggml-impl.h
|
||||
@@ -30,3 +31,6 @@ cp -rpv ../ggml/include/ggml/ggml-backend.h ./ggml-backend.h
|
||||
cp -rpv ../ggml/tests/test-opt.cpp ./tests/test-opt.cpp
|
||||
cp -rpv ../ggml/tests/test-grad0.cpp ./tests/test-grad0.cpp
|
||||
cp -rpv ../ggml/tests/test-backend-ops.cpp ./tests/test-backend-ops.cpp
|
||||
|
||||
cp -rpv ../LICENSE ./LICENSE
|
||||
cp -rpv ../ggml/scripts/gen-authors.sh ./scripts/gen-authors.sh
|
||||
|
||||
Reference in New Issue
Block a user